diff --git a/contrib/Makefile b/contrib/Makefile
index da4e2316a3..721a58fcde 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -36,6 +36,7 @@ SUBDIRS = \
 		pg_prewarm	\
 		pg_stat_statements \
 		pg_surgery	\
+		pg_tracing		\
 		pg_trgm		\
 		pgrowlocks	\
 		pgstattuple	\
diff --git a/contrib/meson.build b/contrib/meson.build
index 84d4e18561..f73caf95f5 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -47,6 +47,7 @@ subdir('pgrowlocks')
 subdir('pg_stat_statements')
 subdir('pgstattuple')
 subdir('pg_surgery')
+subdir('pg_tracing')
 subdir('pg_trgm')
 subdir('pg_visibility')
 subdir('pg_walinspect')
diff --git a/contrib/pg_tracing/Makefile b/contrib/pg_tracing/Makefile
new file mode 100644
index 0000000000..44398a7c44
--- /dev/null
+++ b/contrib/pg_tracing/Makefile
@@ -0,0 +1,28 @@
+# contrib/pg_tracing/Makefile
+
+MODULE_big = pg_tracing
+OBJS = \
+	$(WIN32RES) \
+	pg_tracing.o \
+	query_process.o \
+	pg_tracing_explain.o \
+	span.o
+
+EXTENSION = pg_tracing
+DATA = pg_tracing--1.0.sql
+TAP_TESTS = 0
+PGFILEDESC = "pg_tracing - Distributed tracing for postgres"
+
+REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_tracing/pg_tracing.conf
+REGRESS = create_table utility select sample subxact reset nested wal parallel cleanup
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_tracing
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_tracing/expected/cleanup.out b/contrib/pg_tracing/expected/cleanup.out
new file mode 100644
index 0000000000..ab11333d4e
--- /dev/null
+++ b/contrib/pg_tracing/expected/cleanup.out
@@ -0,0 +1,3 @@
+DROP TABLE pg_tracing_test;
+DROP function test_function;
+DROP EXTENSION pg_tracing;
diff --git a/contrib/pg_tracing/expected/create_table.out b/contrib/pg_tracing/expected/create_table.out
new file mode 100644
index 0000000000..87875a113f
--- /dev/null
+++ b/contrib/pg_tracing/expected/create_table.out
@@ -0,0 +1 @@
+CREATE TABLE pg_tracing_test (a int, b char(20));
diff --git a/contrib/pg_tracing/expected/nested.out b/contrib/pg_tracing/expected/nested.out
new file mode 100644
index 0000000000..0a51c50938
--- /dev/null
+++ b/contrib/pg_tracing/expected/nested.out
@@ -0,0 +1,122 @@
+-- Create test functions
+CREATE OR REPLACE FUNCTION test_function(a int) RETURNS SETOF oid AS
+$BODY$
+BEGIN
+	RETURN QUERY SELECT oid from pg_class where oid = a;
+END;
+$BODY$
+LANGUAGE plpgsql;
+CREATE OR REPLACE PROCEDURE sum_one(i int) AS $$
+DECLARE
+  r int;
+BEGIN
+  SELECT (i + i)::int INTO r;
+END; $$ LANGUAGE plpgsql;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000051-0000000000000051-01'*/ select test_function(1);
+ test_function 
+---------------
+(0 rows)
+
+SELECT span_id AS top_span,
+		extract(epoch from span_start) as top_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_end
+		from pg_tracing_spans(false) where parent_id=81 and name!='Parse' \gset
+SELECT span_id AS top_run_span,
+		extract(epoch from span_start) as top_run_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_run_end
+		from pg_tracing_spans(false) where parent_id=:top_span and name='Executor' and resource='Run' \gset
+SELECT span_id AS top_project,
+		extract(epoch from span_start) as top_project_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_project_end
+		from pg_tracing_spans(false) where parent_id=:top_run_span and name='ProjectSet' \gset
+SELECT span_id AS top_result,
+		extract(epoch from span_start) as top_result_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_result_end
+		from pg_tracing_spans(false) where parent_id=:top_project and name='Result' \gset
+SELECT span_id AS nested_select,
+		extract(epoch from span_start) as select_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as select_end
+		from pg_tracing_spans(false) where parent_id=:top_result and name='Select' \gset
+SELECT span_id AS nested_run,
+		extract(epoch from span_start) as run_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as run_end
+		from pg_tracing_spans(false) where parent_id=:nested_select and resource='Run' \gset
+SELECT :top_start < :top_run_start AS top_query_before_run,
+		:top_end >= :top_run_end AS top_end_after_run_end,
+		:top_run_start <= :top_project_start AS top_run_start_before_project,
+		:top_run_end >= :top_project_end AS top_run_end_after_project_end,
+		:top_run_end >= :select_end AS top_run_end_before_select_end,
+		:top_run_end >= :run_end AS top_run_end_after_nested_run_end,
+		:run_end >= :select_end AS run_end_after_select_end;
+ top_query_before_run | top_end_after_run_end | top_run_start_before_project | top_run_end_after_project_end | top_run_end_before_select_end | top_run_end_after_nested_run_end | run_end_after_select_end 
+----------------------+-----------------------+------------------------------+-------------------------------+-------------------------------+----------------------------------+--------------------------
+ t                    | t                     | t                            | t                             | t                             | t                                | t
+(1 row)
+
+WITH max_duration AS (select max(duration) from pg_tracing_spans(false))
+SELECT duration = max_duration.max from pg_tracing_spans(false), max_duration
+    where span_id = :top_span;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT resource from pg_tracing_spans(false) where parent_id=:nested_run order by resource;
+                                    resource                                     
+---------------------------------------------------------------------------------
+ IndexOnlyScan using pg_class_oid_index on pg_class|Index Cond: (oid = '1'::oid)
+(1 row)
+
+-- Check tracking option
+set pg_tracing.track = 'top';
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000052-0000000000000052-01'*/ select test_function(1);
+ test_function 
+---------------
+(0 rows)
+
+SELECT count(*) from pg_tracing_spans where trace_id=82;
+ count 
+-------
+    10
+(1 row)
+
+set pg_tracing.track = 'none';
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000053-0000000000000053-01'*/ select test_function(1);
+ test_function 
+---------------
+(0 rows)
+
+SELECT count(*) from pg_tracing_spans where trace_id=83;
+ count 
+-------
+     0
+(1 row)
+
+set pg_tracing.track = 'all';
+/*traceparent='00-00000000000000000000000000000054-0000000000000054-01'*/ CALL sum_one(3);
+SELECT resource from pg_tracing_spans order by span_start, duration desc, resource;
+      resource       
+---------------------
+ CALL sum_one(3);
+ Parse
+ Post Parse
+ CALL sum_one(3);
+ SELECT (i + i)::int
+ Parse
+ Post Parse
+ Planner
+ Planner
+ Start
+ Run
+ Result
+ Finish
+ End
+(14 rows)
+
+set pg_tracing.track_utility=off;
+/*traceparent='00-00000000000000000000000000000055-0000000000000055-01'*/ CALL sum_one(10);
+SELECT resource from pg_tracing_spans order by span_start, duration desc, resource;
+ resource 
+----------
+(0 rows)
+
diff --git a/contrib/pg_tracing/expected/parallel.out b/contrib/pg_tracing/expected/parallel.out
new file mode 100644
index 0000000000..2f12f65be0
--- /dev/null
+++ b/contrib/pg_tracing/expected/parallel.out
@@ -0,0 +1,91 @@
+begin;
+-- encourage use of parallel plans
+set parallel_setup_cost=0;
+set parallel_tuple_cost=0;
+set min_parallel_table_scan_size=0;
+set max_parallel_workers_per_gather=4;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ select 1 from pg_class limit 1;
+ ?column? 
+----------
+        1
+(1 row)
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ select 2 from pg_class limit 2;
+ ?column? 
+----------
+        2
+        2
+(2 rows)
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-00'*/ select 3 from pg_class limit 3;
+ ?column? 
+----------
+        3
+        3
+        3
+(3 rows)
+
+commit;
+SELECT resource from pg_tracing_spans order by resource;
+             resource              
+-----------------------------------
+ End
+ End
+ End
+ End
+ End
+ End
+ End
+ End
+ Finish
+ Finish
+ Finish
+ Finish
+ Finish
+ Finish
+ Finish
+ Finish
+ Gather
+ Gather
+ Limit
+ Limit
+ Parallel SeqScan on pg_class
+ Parallel SeqScan on pg_class
+ Parallel SeqScan on pg_class
+ Parallel SeqScan on pg_class
+ Parallel SeqScan on pg_class
+ Parallel SeqScan on pg_class
+ Parallel SeqScan on pg_class
+ Parallel SeqScan on pg_class
+ Parse
+ Parse
+ Planner
+ Planner
+ Post Parse
+ Post Parse
+ Run
+ Run
+ Run
+ Run
+ Run
+ Run
+ Run
+ Run
+ Start
+ Start
+ Start
+ Start
+ Start
+ Start
+ Start
+ Start
+ Worker 0
+ Worker 0
+ Worker 1
+ Worker 1
+ Worker 2
+ Worker 2
+ select $1 from pg_class limit $2;
+ select $1 from pg_class limit $2;
+(58 rows)
+
diff --git a/contrib/pg_tracing/expected/reset.out b/contrib/pg_tracing/expected/reset.out
new file mode 100644
index 0000000000..2e0c903121
--- /dev/null
+++ b/contrib/pg_tracing/expected/reset.out
@@ -0,0 +1,13 @@
+-- Check reset is working
+SELECT pg_tracing_reset();
+ pg_tracing_reset 
+------------------
+ 
+(1 row)
+
+SELECT traces from pg_tracing_info;
+ traces 
+--------
+      0
+(1 row)
+
diff --git a/contrib/pg_tracing/expected/sample.out b/contrib/pg_tracing/expected/sample.out
new file mode 100644
index 0000000000..7275407e5d
--- /dev/null
+++ b/contrib/pg_tracing/expected/sample.out
@@ -0,0 +1,215 @@
+-- Trace nothing
+SET pg_tracing.sample_rate = 0.0;
+SET pg_tracing.caller_sample_rate = 0.0;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000021-0000000000000021-01'*/ SELECT 1;
+ ?column? 
+----------
+        1
+(1 row)
+
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+ count 
+-------
+     0
+(1 row)
+
+select resource, parameters from pg_tracing_spans order by span_start, duration desc;
+ resource | parameters 
+----------+------------
+(0 rows)
+
+-- Trace everything
+SET pg_tracing.sample_rate = 1.0;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000022-0000000000000022-01'*/ SELECT 1;
+ ?column? 
+----------
+        1
+(1 row)
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000023-0000000000000023-00'*/ SELECT 2;
+ ?column? 
+----------
+        2
+(1 row)
+
+SELECT 3;
+ ?column? 
+----------
+        3
+(1 row)
+
+SELECT 4;
+ ?column? 
+----------
+        4
+(1 row)
+
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+ count 
+-------
+     4
+(1 row)
+
+select resource, parameters from pg_tracing_spans(false) order by span_start, duration desc;
+  resource  | parameters 
+------------+------------
+ SELECT $1; | $1 = 1
+ Parse      | 
+ Post Parse | 
+ Planner    | 
+ Start      | 
+ Run        | 
+ Result     | 
+ Finish     | 
+ End        | 
+ SELECT $1; | $1 = 2
+ Parse      | 
+ Post Parse | 
+ Planner    | 
+ Start      | 
+ Run        | 
+ Result     | 
+ Finish     | 
+ End        | 
+ SELECT $1; | $1 = 3
+ Parse      | 
+ Post Parse | 
+ Planner    | 
+ Start      | 
+ Run        | 
+ Result     | 
+ Finish     | 
+ End        | 
+ SELECT $1; | $1 = 4
+ Parse      | 
+ Post Parse | 
+ Planner    | 
+ Start      | 
+ Run        | 
+ Result     | 
+ Finish     | 
+ End        | 
+(36 rows)
+
+-- Top spans should reuse generated ids
+select resource, parameters from pg_tracing_spans(false) where trace_id = parent_id and parent_id = span_id order by span_start, duration desc;
+  resource  | parameters 
+------------+------------
+ SELECT $1; | $1 = 3
+ SELECT $1; | $1 = 4
+(2 rows)
+
+select resource, parameters from pg_tracing_spans order by span_start, duration desc;
+  resource  | parameters 
+------------+------------
+ SELECT $1; | $1 = 1
+ Parse      | 
+ Post Parse | 
+ Planner    | 
+ Start      | 
+ Run        | 
+ Result     | 
+ Finish     | 
+ End        | 
+ SELECT $1; | $1 = 2
+ Parse      | 
+ Post Parse | 
+ Planner    | 
+ Start      | 
+ Run        | 
+ Result     | 
+ Finish     | 
+ End        | 
+ SELECT $1; | $1 = 3
+ Parse      | 
+ Post Parse | 
+ Planner    | 
+ Start      | 
+ Run        | 
+ Result     | 
+ Finish     | 
+ End        | 
+ SELECT $1; | $1 = 4
+ Parse      | 
+ Post Parse | 
+ Planner    | 
+ Start      | 
+ Run        | 
+ Result     | 
+ Finish     | 
+ End        | 
+(36 rows)
+
+-- Cleanup
+SET plan_cache_mode='auto';
+SET pg_tracing.sample_rate = 0.0;
+-- Only trace query with sampled flag
+SET pg_tracing.caller_sample_rate = 1.0;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000024-0000000000000024-01'*/ SELECT 1;
+ ?column? 
+----------
+        1
+(1 row)
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000025-0000000000000025-00'*/ SELECT 2;
+ ?column? 
+----------
+        2
+(1 row)
+
+SELECT 1;
+ ?column? 
+----------
+        1
+(1 row)
+
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+ count 
+-------
+     3
+(1 row)
+
+select resource, parameters from pg_tracing_spans(false) order by span_start, duration desc;
+             resource              | parameters 
+-----------------------------------+------------
+ SET plan_cache_mode='auto';       | 
+ Parse                             | 
+ Post Parse                        | 
+ SET plan_cache_mode='auto';       | 
+ SET pg_tracing.sample_rate = 0.0; | 
+ Parse                             | 
+ Post Parse                        | 
+ SET pg_tracing.sample_rate = 0.0; | 
+ SELECT $1;                        | $1 = 1
+ Parse                             | 
+ Post Parse                        | 
+ Planner                           | 
+ Start                             | 
+ Run                               | 
+ Result                            | 
+ Finish                            | 
+ End                               | 
+(17 rows)
+
+select resource, parameters from pg_tracing_spans order by span_start;
+             resource              | parameters 
+-----------------------------------+------------
+ SET plan_cache_mode='auto';       | 
+ Parse                             | 
+ Post Parse                        | 
+ SET plan_cache_mode='auto';       | 
+ SET pg_tracing.sample_rate = 0.0; | 
+ Parse                             | 
+ Post Parse                        | 
+ SET pg_tracing.sample_rate = 0.0; | 
+ SELECT $1;                        | $1 = 1
+ Parse                             | 
+ Post Parse                        | 
+ Planner                           | 
+ Start                             | 
+ Run                               | 
+ Result                            | 
+ Finish                            | 
+ End                               | 
+(17 rows)
+
diff --git a/contrib/pg_tracing/expected/select.out b/contrib/pg_tracing/expected/select.out
new file mode 100644
index 0000000000..206f0c03ed
--- /dev/null
+++ b/contrib/pg_tracing/expected/select.out
@@ -0,0 +1,225 @@
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ SELECT 1;
+ ?column? 
+----------
+        1
+(1 row)
+
+-- Get top span id
+SELECT span_id AS top_span_id from pg_tracing_spans(false) where parent_id=1 and name!='Parse' \gset
+-- Check parameters
+SELECT parameters from pg_tracing_spans(false) where span_id=:top_span_id;
+ parameters 
+------------
+ $1 = 1
+(1 row)
+
+-- Check the number of children
+SELECT count(*) from pg_tracing_spans(false) where parent_id=:'top_span_id';
+ count 
+-------
+     7
+(1 row)
+
+-- Check resource and query id
+SELECT resource, query_id from pg_tracing_spans(false) where trace_id=1 order by span_start, duration desc, resource;
+  resource  |      query_id       
+------------+---------------------
+ SELECT $1; | 6865378226349601843
+ Parse      | 6865378226349601843
+ Post Parse | 6865378226349601843
+ Planner    | 6865378226349601843
+ Start      | 6865378226349601843
+ Run        | 6865378226349601843
+ Result     | 6865378226349601843
+ Finish     | 6865378226349601843
+ End        | 6865378226349601843
+(9 rows)
+
+-- Check reported number of trace
+SELECT traces from pg_tracing_info;
+ traces 
+--------
+      2
+(1 row)
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ SELECT * from current_database();
+   current_database    
+-----------------------
+ regression_pg_tracing
+(1 row)
+
+SELECT resource from pg_tracing_spans where trace_id=3 order by resource;
+             resource              
+-----------------------------------
+ End
+ Finish
+ FunctionScan on current_database
+ Parse
+ Planner
+ Post Parse
+ Run
+ SELECT * from current_database();
+ Start
+(9 rows)
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000004-0000000000000004-01'*/ SELECT s.relation_size + s.index_size
+FROM (SELECT
+      pg_relation_size(C.oid) as relation_size,
+      pg_indexes_size(C.oid) as index_size
+    FROM pg_class C) as s limit 1;
+ ?column? 
+----------
+        0
+(1 row)
+
+SELECT resource from pg_tracing_spans where trace_id=4 order by resource;
+                                                                             resource                                                                             
+------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ End
+ Finish
+ Limit
+ Parse
+ Planner
+ Post Parse
+ Run
+ SELECT s.relation_size + s.index_sizeFROM (SELECT pg_relation_size(C.oid) as relation_size, pg_indexes_size(C.oid) as index_size FROM pg_class C) as s limit $1;
+ SeqScan on pg_class c
+ Start
+ SubqueryScan on s
+(11 rows)
+
+-- Check that we're in a correct state after a timeout
+set statement_timeout=200;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000007-0000000000000007-01'*/ select * from pg_sleep(10);
+ERROR:  canceling statement due to statement timeout
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000008-0000000000000008-01'*/ select 1;
+ ?column? 
+----------
+        1
+(1 row)
+
+SELECT trace_id, resource, sql_error_code from pg_tracing_spans order by span_start, duration desc, resource;
+ trace_id |          resource           | sql_error_code 
+----------+-----------------------------+----------------
+        7 | select * from pg_sleep($1); | 57014
+        7 | Parse                       | 00000
+        7 | Post Parse                  | 00000
+        7 | Planner                     | 00000
+        7 | Start                       | 00000
+        7 | Run                         | 57014
+        7 | FunctionScan on pg_sleep    | 57014
+        8 | select $1;                  | 00000
+        8 | Parse                       | 00000
+        8 | Post Parse                  | 00000
+        8 | Planner                     | 00000
+        8 | Start                       | 00000
+        8 | Run                         | 00000
+        8 | Result                      | 00000
+        8 | Finish                      | 00000
+        8 | End                         | 00000
+(16 rows)
+
+set statement_timeout=0;
+-- Test prepared statement
+PREPARE test_prepared (text, integer) AS /*$1*/ SELECT $2;
+EXECUTE test_prepared('dddbs=''postgres.db'',traceparent=''00-00000000000000000000000000000009-0000000000000009-01''', 1);
+ ?column? 
+----------
+        1
+(1 row)
+
+SELECT trace_id, resource, parameters from pg_tracing_spans order by span_start, duration desc, resource;
+ trace_id |                      resource                       |                                                   parameters                                                   
+----------+-----------------------------------------------------+----------------------------------------------------------------------------------------------------------------
+        9 | PREPARE test_prepared (text, integer) AS SELECT $2; | $1 = 'dddbs=''postgres.db'',traceparent=''00-00000000000000000000000000000009-0000000000000009-01''', $2 = '1'
+        9 | Planner                                             | 
+        9 | Start                                               | 
+        9 | Run                                                 | 
+        9 | Result                                              | 
+        9 | Finish                                              | 
+        9 | End                                                 | 
+(7 rows)
+
+-- Check explain is untracked
+explain (costs off)
+select * from pg_class where oid < 0 limit 100;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Limit
+   ->  Index Scan using pg_class_oid_index on pg_class
+         Index Cond: (oid < '0'::oid)
+(3 rows)
+
+SELECT trace_id, resource, parameters from pg_tracing_spans order by span_start, duration desc, resource;
+ trace_id | resource | parameters 
+----------+----------+------------
+(0 rows)
+
+-- Never executed node
+/*dddbs='postgres.db',traceparent='00-0000000000000000000000000000000a-000000000000000a-01'*/ select 1 limit 0;
+ ?column? 
+----------
+(0 rows)
+
+SELECT trace_id, resource, parameters from pg_tracing_spans order by span_start, duration desc, resource;
+ trace_id |      resource       |   parameters   
+----------+---------------------+----------------
+       10 | select $1 limit $2; | $1 = 1, $2 = 0
+       10 | Parse               | 
+       10 | Post Parse          | 
+       10 | Planner             | 
+       10 | Start               | 
+       10 | Run                 | 
+       10 | Limit               | 
+       10 | Finish              | 
+       10 | End                 | 
+(9 rows)
+
+-- Test prepared statement with generic plan
+SET plan_cache_mode='force_generic_plan';
+EXECUTE test_prepared('dddbs=''postgres.db'',traceparent=''00-00000000000000000000000000000010-0000000000000010-01''', 10);
+ ?column? 
+----------
+       10
+(1 row)
+
+SELECT trace_id, resource, parameters from pg_tracing_spans order by span_start, duration desc, resource;
+ trace_id |                      resource                       | parameters 
+----------+-----------------------------------------------------+------------
+       16 | PREPARE test_prepared (text, integer) AS SELECT $2; | 
+       16 | Start                                               | 
+       16 | Run                                                 | 
+       16 | Result                                              | 
+       16 | Finish                                              | 
+       16 | End                                                 | 
+(6 rows)
+
+-- Test multiple statements
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000011-0000000000000012-01'*/ select 1; select 2;
+ ?column? 
+----------
+        1
+(1 row)
+
+ ?column? 
+----------
+        2
+(1 row)
+
+select resource, parameters from pg_tracing_spans order by span_start, duration desc;
+  resource  | parameters 
+------------+------------
+ select $1; | $1 = 1
+ Parse      | 
+ Post Parse | 
+ Planner    | 
+ Start      | 
+ Run        | 
+ Result     | 
+ Finish     | 
+ End        | 
+(9 rows)
+
+-- Cleanup
+SET plan_cache_mode='auto';
+DEALLOCATE test_prepared;
diff --git a/contrib/pg_tracing/expected/subxact.out b/contrib/pg_tracing/expected/subxact.out
new file mode 100644
index 0000000000..69fe117a4f
--- /dev/null
+++ b/contrib/pg_tracing/expected/subxact.out
@@ -0,0 +1,70 @@
+SET pg_tracing.sample_rate = 1.0;
+BEGIN;
+SAVEPOINT s1;
+INSERT INTO pg_tracing_test VALUES(generate_series(1, 2), 'aaa');
+SAVEPOINT s2;
+INSERT INTO pg_tracing_test VALUES(generate_series(1, 2), 'aaa');
+SAVEPOINT s3;
+SELECT 1;
+ ?column? 
+----------
+        1
+(1 row)
+
+COMMIT;
+select resource, parameters, subxact_count from pg_tracing_spans order by span_start, duration desc;
+                             resource                             |         parameters         | subxact_count 
+------------------------------------------------------------------+----------------------------+---------------
+ BEGIN;                                                           |                            |             0
+ Parse                                                            |                            |             0
+ Post Parse                                                       |                            |             0
+ BEGIN;                                                           |                            |             0
+ SAVEPOINT $1;                                                    | $1 = s1                    |             0
+ Parse                                                            |                            |             0
+ Post Parse                                                       |                            |             0
+ SAVEPOINT s1;                                                    |                            |             0
+ INSERT INTO pg_tracing_test VALUES(generate_series($1, $2), $3); | $1 = 1, $2 = 2, $3 = 'aaa' |             0
+ Parse                                                            |                            |             0
+ Post Parse                                                       |                            |             0
+ Planner                                                          |                            |             0
+ Start                                                            |                            |             0
+ Run                                                              |                            |             0
+ Insert on pg_tracing_test                                        |                            |             1
+ ProjectSet                                                       |                            |             1
+ Result                                                           |                            |             1
+ Finish                                                           |                            |             1
+ End                                                              |                            |             1
+ SAVEPOINT $1;                                                    | $1 = s2                    |             1
+ Parse                                                            |                            |             1
+ Post Parse                                                       |                            |             1
+ SAVEPOINT s2;                                                    |                            |             1
+ INSERT INTO pg_tracing_test VALUES(generate_series($1, $2), $3); | $1 = 1, $2 = 2, $3 = 'aaa' |             1
+ Parse                                                            |                            |             1
+ Post Parse                                                       |                            |             1
+ Planner                                                          |                            |             1
+ Start                                                            |                            |             1
+ Run                                                              |                            |             1
+ Insert on pg_tracing_test                                        |                            |             2
+ ProjectSet                                                       |                            |             2
+ Result                                                           |                            |             2
+ Finish                                                           |                            |             2
+ End                                                              |                            |             2
+ SAVEPOINT $1;                                                    | $1 = s3                    |             2
+ Parse                                                            |                            |             2
+ Post Parse                                                       |                            |             2
+ SAVEPOINT s3;                                                    |                            |             2
+ SELECT $1;                                                       | $1 = 1                     |             2
+ Parse                                                            |                            |             2
+ Post Parse                                                       |                            |             2
+ Planner                                                          |                            |             2
+ Start                                                            |                            |             2
+ Run                                                              |                            |             2
+ Result                                                           |                            |             2
+ Finish                                                           |                            |             2
+ End                                                              |                            |             2
+ COMMIT;                                                          |                            |             2
+ Parse                                                            |                            |             2
+ Post Parse                                                       |                            |             2
+ COMMIT;                                                          |                            |             2
+(51 rows)
+
diff --git a/contrib/pg_tracing/expected/utility.out b/contrib/pg_tracing/expected/utility.out
new file mode 100644
index 0000000000..4d7095eaac
--- /dev/null
+++ b/contrib/pg_tracing/expected/utility.out
@@ -0,0 +1,118 @@
+-- Track statements in create extension
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ CREATE EXTENSION pg_tracing;
+SELECT span_id AS top_utility,
+		extract(epoch from span_start) as top_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_utility_end
+		from pg_tracing_spans(false) where parent_id=1 and name='Utility' \gset
+SELECT span_id AS top_process_utility,
+		extract(epoch from span_start) as top_process_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_process_utility_end
+		from pg_tracing_spans(false) where parent_id=:top_utility and name='ProcessUtility' \gset
+SELECT span_id AS first_utility,
+		extract(epoch from span_start) as first_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as first_utility_end
+		from pg_tracing_spans(false) where parent_id=:top_process_utility and name='Utility' limit 1 \gset
+SELECT span_id AS first_process_utility,
+		extract(epoch from span_start) as first_process_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as first_process_utility_end
+		from pg_tracing_spans(false) where parent_id=:first_utility and name='ProcessUtility' \gset
+SELECT span_id AS second_utility,
+		extract(epoch from span_start) as second_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as second_utility_end
+		from pg_tracing_spans(false) where parent_id=:top_process_utility and name='Utility' limit 1 offset 1 \gset
+SELECT :top_utility_start < :top_process_utility_start AS top_utility_start_before_process_utility,
+		:top_utility_end >= :top_process_utility_end AS top_end_after_process_utility,
+		:first_process_utility_end < :first_utility_end AS nested_process_utility_ends_before,
+		:first_utility_end < :second_utility_start AS second_utility_start_after_first_one;
+ top_utility_start_before_process_utility | top_end_after_process_utility | nested_process_utility_ends_before | second_utility_start_after_first_one 
+------------------------------------------+-------------------------------+------------------------------------+--------------------------------------
+ t                                        | t                             | t                                  | t
+(1 row)
+
+-- Clean current spans
+select count(*) from pg_tracing_spans;
+ count 
+-------
+    38
+(1 row)
+
+-- Test utility off
+SET pg_tracing.track_utility = off;
+-- Test prepared statement
+PREPARE test_prepared_one_param (integer) AS SELECT $1;
+EXECUTE test_prepared_one_param(100);
+ ?column? 
+----------
+      100
+(1 row)
+
+select count(*) from pg_tracing_spans;
+ count 
+-------
+     0
+(1 row)
+
+-- Test prepared statement with generic plan
+SET plan_cache_mode='force_generic_plan';
+EXECUTE test_prepared_one_param(200);
+ ?column? 
+----------
+      200
+(1 row)
+
+select count(*) from pg_tracing_spans;
+ count 
+-------
+     0
+(1 row)
+
+-- With utility on
+SET pg_tracing.track_utility = on;
+-- Test prepared statement
+PREPARE test_prepared_one_param_2 (integer) AS SELECT $1;
+EXECUTE test_prepared_one_param_2(100);
+ ?column? 
+----------
+      100
+(1 row)
+
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+ count 
+-------
+     0
+(1 row)
+
+select resource, parameters from pg_tracing_spans(false) order by span_start, duration desc;
+ resource | parameters 
+----------+------------
+(0 rows)
+
+select resource, parameters from pg_tracing_spans where trace_id = parent_id and parent_id = span_id order by span_start;
+ resource | parameters 
+----------+------------
+(0 rows)
+
+-- Test prepared statement with generic plan
+SET plan_cache_mode='force_generic_plan';
+EXECUTE test_prepared_one_param(200);
+ ?column? 
+----------
+      200
+(1 row)
+
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+ count 
+-------
+     0
+(1 row)
+
+select resource, parameters from pg_tracing_spans(false) order by span_start, duration desc;
+ resource | parameters 
+----------+------------
+(0 rows)
+
+select resource, parameters from pg_tracing_spans where trace_id = parent_id and parent_id = span_id order by span_start;
+ resource | parameters 
+----------+------------
+(0 rows)
+
diff --git a/contrib/pg_tracing/expected/wal.out b/contrib/pg_tracing/expected/wal.out
new file mode 100644
index 0000000000..4d74aab625
--- /dev/null
+++ b/contrib/pg_tracing/expected/wal.out
@@ -0,0 +1,89 @@
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ INSERT INTO pg_tracing_test VALUES(generate_series(1, 10), 'aaa');
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ UPDATE pg_tracing_test SET b = 'bbb' WHERE a > 7;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ DELETE FROM pg_tracing_test WHERE a > 9;
+-- Check WAL is generated for the above statements
+SELECT trace_id, name, resource,
+wal_records > 0 as wal_records,
+wal_bytes > 0 as wal_bytes
+FROM pg_tracing_spans order by span_start, duration desc, resource;
+ trace_id |    name    |                             resource                             | wal_records | wal_bytes 
+----------+------------+------------------------------------------------------------------+-------------+-----------
+        1 | Insert     | INSERT INTO pg_tracing_test VALUES(generate_series($1, $2), $3); | t           | t
+        1 | Parse      | Parse                                                            |             | 
+        1 | Post Parse | Post Parse                                                       |             | 
+        1 | Planner    | Planner                                                          | f           | f
+        1 | Executor   | Start                                                            |             | 
+        1 | Executor   | Run                                                              |             | 
+        1 | Insert     | Insert on pg_tracing_test                                        | t           | t
+        1 | ProjectSet | ProjectSet                                                       | f           | f
+        1 | Result     | Result                                                           | f           | f
+        1 | Executor   | Finish                                                           |             | 
+        1 | Executor   | End                                                              |             | 
+        2 | Update     | UPDATE pg_tracing_test SET b = $1 WHERE a > $2;                  | t           | t
+        2 | Parse      | Parse                                                            |             | 
+        2 | Post Parse | Post Parse                                                       |             | 
+        2 | Planner    | Planner                                                          | f           | f
+        2 | Executor   | Start                                                            |             | 
+        2 | Executor   | Run                                                              |             | 
+        2 | Update     | Update on pg_tracing_test                                        | t           | t
+        2 | SeqScan    | SeqScan on pg_tracing_test|Filter : (a > 7)                      | f           | f
+        2 | Executor   | Finish                                                           |             | 
+        2 | Executor   | End                                                              |             | 
+        3 | Delete     | DELETE FROM pg_tracing_test WHERE a > $1;                        | t           | t
+        3 | Parse      | Parse                                                            |             | 
+        3 | Post Parse | Post Parse                                                       |             | 
+        3 | Planner    | Planner                                                          | f           | f
+        3 | Executor   | Start                                                            |             | 
+        3 | Executor   | Run                                                              |             | 
+        3 | Delete     | Delete on pg_tracing_test                                        | t           | t
+        3 | SeqScan    | SeqScan on pg_tracing_test|Filter : (a > 9)                      | f           | f
+        3 | Executor   | Finish                                                           |             | 
+        3 | Executor   | End                                                              |             | 
+(31 rows)
+
+-- Redo the same but without wal instrumentation
+set pg_tracing.instrument_wal = false;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ INSERT INTO pg_tracing_test VALUES(generate_series(1, 10), 'aaa');
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ UPDATE pg_tracing_test SET b = 'bbb' WHERE a > 7;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ DELETE FROM pg_tracing_test WHERE a > 9;
+SELECT trace_id, name, resource,
+wal_records = 0 as wal_records,
+wal_bytes = 0 as wal_bytes
+FROM pg_tracing_spans order by span_start, duration desc, resource;
+ trace_id |    name    |                             resource                             | wal_records | wal_bytes 
+----------+------------+------------------------------------------------------------------+-------------+-----------
+        1 | Insert     | INSERT INTO pg_tracing_test VALUES(generate_series($1, $2), $3); | t           | t
+        1 | Parse      | Parse                                                            |             | 
+        1 | Post Parse | Post Parse                                                       |             | 
+        1 | Planner    | Planner                                                          | t           | t
+        1 | Executor   | Start                                                            |             | 
+        1 | Executor   | Run                                                              |             | 
+        1 | Insert     | Insert on pg_tracing_test                                        | t           | t
+        1 | ProjectSet | ProjectSet                                                       | t           | t
+        1 | Result     | Result                                                           | t           | t
+        1 | Executor   | Finish                                                           |             | 
+        1 | Executor   | End                                                              |             | 
+        2 | Update     | UPDATE pg_tracing_test SET b = $1 WHERE a > $2;                  | t           | t
+        2 | Parse      | Parse                                                            |             | 
+        2 | Post Parse | Post Parse                                                       |             | 
+        2 | Planner    | Planner                                                          | t           | t
+        2 | Executor   | Start                                                            |             | 
+        2 | Executor   | Run                                                              |             | 
+        2 | Update     | Update on pg_tracing_test                                        | t           | t
+        2 | SeqScan    | SeqScan on pg_tracing_test|Filter : (a > 7)                      | t           | t
+        2 | Executor   | Finish                                                           |             | 
+        2 | Executor   | End                                                              |             | 
+        3 | Delete     | DELETE FROM pg_tracing_test WHERE a > $1;                        | t           | t
+        3 | Parse      | Parse                                                            |             | 
+        3 | Post Parse | Post Parse                                                       |             | 
+        3 | Planner    | Planner                                                          | t           | t
+        3 | Executor   | Start                                                            |             | 
+        3 | Executor   | Run                                                              |             | 
+        3 | Delete     | Delete on pg_tracing_test                                        | t           | t
+        3 | SeqScan    | SeqScan on pg_tracing_test|Filter : (a > 9)                      | t           | t
+        3 | Executor   | Finish                                                           |             | 
+        3 | Executor   | End                                                              |             | 
+(31 rows)
+
+-- Cleanup
+set pg_tracing.instrument_wal = true;
diff --git a/contrib/pg_tracing/meson.build b/contrib/pg_tracing/meson.build
new file mode 100644
index 0000000000..c3afd256fe
--- /dev/null
+++ b/contrib/pg_tracing/meson.build
@@ -0,0 +1,47 @@
+# Copyright (c) 2022-2023, PostgreSQL Global Development Group
+
+pg_tracing_sources = files(
+  'pg_tracing.c',
+  'query_process.c',
+  'pg_tracing_explain.c',
+  'span.c',
+)
+
+if host_system == 'windows'
+  pg_tracing_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'pg_tracing',
+    '--FILEDESC', 'pg_tracing - Add support for distributed tracing',])
+endif
+
+pg_tracing = shared_module('pg_tracing',
+  pg_tracing_sources,
+  kwargs: contrib_mod_args,
+)
+contrib_targets += pg_tracing
+
+install_data(
+  'pg_tracing.control',
+  'pg_tracing--1.0.sql',
+  kwargs: contrib_data_args,
+)
+
+tests += {
+  'name': 'pg_tracing',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'regress': {
+	'sql': [
+	  'create_table',
+	  'utility',
+	  'select',
+	  'sample',
+	  'subxact',
+      'reset',
+      'nested',
+      'wal',
+      'parallel',
+      'cleanup'
+    ],
+	'regress_args': ['--temp-config', files('pg_tracing.conf')],
+  },
+}
diff --git a/contrib/pg_tracing/pg_tracing--1.0.sql b/contrib/pg_tracing/pg_tracing--1.0.sql
new file mode 100644
index 0000000000..401fd75f39
--- /dev/null
+++ b/contrib/pg_tracing/pg_tracing--1.0.sql
@@ -0,0 +1,94 @@
+/* contrib/pg_tracing/pg_tracing--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "CREATE EXTENSION pg_tracing" to load this file. \quit
+
+--- Define pg_tracing_info
+CREATE FUNCTION pg_tracing_info(
+    OUT traces bigint,
+    OUT spans bigint,
+    OUT dropped_spans bigint,
+    OUT failed_truncates bigint,
+    OUT last_consume timestamp with time zone,
+    OUT stats_reset timestamp with time zone,
+    OUT sampling_rate double precision
+)
+RETURNS record
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT VOLATILE PARALLEL SAFE;
+
+CREATE FUNCTION pg_tracing_reset()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C PARALLEL SAFE;
+
+CREATE FUNCTION pg_tracing_spans(
+    IN consume bool,
+	OUT trace_id bigint,
+	OUT parent_id bigint,
+	OUT span_id bigint,
+	OUT query_id bigint,
+    OUT name text,
+	OUT resource text,
+	OUT span_start timestamp with time zone,
+	OUT duration bigint,
+	OUT sql_error_code character(5),
+	OUT pid int4,
+	OUT nested_level smallint,
+	OUT subxact_count smallint,
+	OUT top_span bool,
+
+--  Plan counters
+    OUT startup_cost float8,
+    OUT total_cost float8,
+    OUT plan_rows float8,
+    OUT plan_width int,
+
+-- Node Counters
+    OUT rows int8,
+    OUT nloops int8,
+
+    OUT shared_blks_hit int8,
+    OUT shared_blks_read int8,
+    OUT shared_blks_dirtied int8,
+    OUT shared_blks_written int8,
+
+    OUT local_blks_hit int8,
+    OUT local_blks_read int8,
+    OUT local_blks_dirtied int8,
+    OUT local_blks_written int8,
+
+    OUT blk_read_time float8,
+    OUT blk_write_time float8,
+
+    OUT temp_blks_read int8,
+    OUT temp_blks_written int8,
+    OUT temp_blk_read_time float8,
+    OUT temp_blk_write_time float8,
+
+    OUT wal_records int8,
+    OUT wal_fpi int8,
+    OUT wal_bytes numeric,
+
+	OUT jit_functions int8,
+	OUT jit_generation_time float8,
+	OUT jit_inlining_time float8,
+	OUT jit_optimization_time float8,
+	OUT jit_emission_time float8,
+
+--  SpanNode specific data
+	OUT startup bigint, -- First tuple
+	OUT parameters text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C PARALLEL SAFE;
+
+CREATE VIEW pg_tracing_info AS
+  SELECT * FROM pg_tracing_info();
+
+CREATE VIEW pg_tracing_spans AS
+  SELECT * FROM pg_tracing_spans(true);
+
+GRANT SELECT ON pg_tracing_info TO PUBLIC;
+GRANT SELECT ON pg_tracing_spans TO PUBLIC;
diff --git a/contrib/pg_tracing/pg_tracing.c b/contrib/pg_tracing/pg_tracing.c
new file mode 100644
index 0000000000..22b8ca3041
--- /dev/null
+++ b/contrib/pg_tracing/pg_tracing.c
@@ -0,0 +1,2326 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_tracing.c
+ *		Generate spans for distributed tracing from SQL query
+ *
+ * We rely on the caller to know whether the query needs to be traced or not.
+ * The information is propagated through https://google.github.io/sqlcommenter/.
+ *
+ * A query with sqlcommenter will look like: /\*dddbs='postgres.db',traceparent='00-00000000000000000000000000000009-0000000000000005-01'*\/ select 1;
+ * The traceparent fields are detailed in https://www.w3.org/TR/trace-context/#traceparent-header-field-values
+ * 00000000000000000000000000000009: trace id
+ * 0000000000000005: parent id
+ * 01: trace flags (01 == sampled)
+ *
+ * If sampled is set in the trace flags, we will generate spans for the ongoing query.
+ * A span represents an operation with a start and a duration.
+ * We will track the following operations:
+ * - Top Span: The top span for a query. They are created after extracting the traceid from traceparent or to represent a nested query.
+ * - Planner: We track the time spent in the planner and report the planner counters
+ * - Node Span: Created from planstate. The name is extracted from the node type (IndexScan, SeqScan),
+ * - Executor: We trace the different steps of the Executor: Start, Run, Finish and End
+ *
+ * A typical traced query will generate the following spans:
+ * +------------------------------------------------------------------------------------------------------------------------------------------------------+
+ * | Name: Select                                                                                                                                         |
+ * | Operation: Select * pgbench_accounts WHERE aid=$1;                                                                                                   |
+ * +---+------------------------+-+------------------+--+------------------------------------------------------+-+--------------------+-+----------------++
+ *     | Name: Planner          | | Name: Executor   |  |Name: Executor                                        | | Name: Executor     | | Name: Executor |
+ *     | Operation: Planner     | | Operation: Start |  |Operation: Run                                        | | Operation: Finish  | | Operation: End |
+ *     +------------------------+ +------------------+  +--+--------------------------------------------------++ +--------------------+ +----------------+
+ *                                                         | Name: IndexScan                                  |
+ *                                                         | Operation: IndexScan using pgbench_accounts_pkey |
+ *                                                         |            on pgbench_accounts                   |
+ *                                                         +--------------------------------------------------+
+ *
+ * IDENTIFICATION
+ *	  contrib/pg_tracing/pg_tracing.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pg_tracing_explain.h"
+#include "pg_tracing.h"
+#include "query_process.h"
+#include "span.h"
+
+#include "utils/ruleutils.h"
+#include "access/xact.h"
+#include "common/pg_prng.h"
+#include "funcapi.h"
+#include "nodes/extensible.h"
+#include "optimizer/planner.h"
+#include "parser/analyze.h"
+#include "storage/ipc.h"
+#include "tcop/utility.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/xid8.h"
+
+PG_MODULE_MAGIC;
+
+typedef enum
+{
+	PG_TRACING_TRACK_NONE,		/* track no statements */
+	PG_TRACING_TRACK_TOP,		/* only top level statements */
+	PG_TRACING_TRACK_ALL		/* all statements, including nested ones */
+}			pgTracingTrackLevel;
+
+/*
+ * Structure to store flexible array of spans
+ */
+typedef struct pgTracingSpans
+{
+	int			end;			/* Index of last element */
+	int			max;			/* Maximum number of element */
+	Span		spans[FLEXIBLE_ARRAY_MEMBER];
+}			pgTracingSpans;
+
+/*
+ * A trace context for a specific parallel context
+ */
+typedef struct pgTracingParallelContext
+{
+	BackendId	leader_backend_id;	/* Backend id of the leader, set to
+									 * InvalidBackendId if unused */
+	uint64		trace_id;		/* Trace id of the sampled query */
+	uint64		parent_id;		/* Id of the parent Gather span */
+	uint64		query_id;		/* Query id from the parent worker */
+	pgTracingStartTime start_trace;
+
+}			pgTracingParallelContext;
+
+/*
+ * Store context for parallel workers
+ */
+typedef struct pgTracingParallelWorkers
+{
+	slock_t		mutex;
+	pgTracingParallelContext trace_contexts[FLEXIBLE_ARRAY_MEMBER];
+}			pgTracingParallelWorkers;
+
+/* GUC variables */
+static int	pg_tracing_max_span;	/* Maximum number of spans to store */
+static int	pg_tracing_max_parameter_str;	/* Maximum number of spans to
+											 * store */
+static int	pg_tracing_max_traced_parallel_workers; /* Maximum number of
+													 * parallel workers traced */
+static int	pg_tracing_initial_allocated_spans = 25;	/* Number of spans
+														 * allocated at the
+														 * start of a trace. */
+static bool pg_tracing_instrument_buffers;	/* Enable buffers instrumentation */
+static bool pg_tracing_instrument_wal;	/* Enable WAL instrumentation */
+static double pg_tracing_sample_rate = 0;	/* Sample rate applied to queries
+											 * without SQLCommenter */
+static double pg_tracing_caller_sample_rate = 1;	/* Sample rate applied to
+													 * queries with
+													 * SQLCommenter */
+static int	pg_tracing_track = PG_TRACING_TRACK_ALL;	/* tracking level */
+static bool pg_tracing_track_utility = true;	/* whether to track utility
+												 * commands */
+static bool pg_tracing_drop_full_buffer = true; /* whether to drop buffered
+												 * spans when full */
+
+
+int64		nested_query_start_ns = 0;
+
+static const struct config_enum_entry track_options[] =
+{
+	{"none", PG_TRACING_TRACK_NONE, false},
+	{"top", PG_TRACING_TRACK_TOP, false},
+	{"all", PG_TRACING_TRACK_ALL, false},
+	{NULL, 0, false}
+};
+
+#define pg_tracing_enabled(level) \
+	((pg_tracing_track == PG_TRACING_TRACK_ALL || \
+	(pg_tracing_track == PG_TRACING_TRACK_TOP && (level) == 0)))
+
+PG_FUNCTION_INFO_V1(pg_tracing_info);
+PG_FUNCTION_INFO_V1(pg_tracing_spans);
+PG_FUNCTION_INFO_V1(pg_tracing_reset);
+
+/*
+ * Global variables
+ */
+
+/* Memory context for pg_tracing. */
+static MemoryContext pg_tracing_mem_ctx;
+
+/* Current trace state */
+struct pgTracingTrace current_trace = {};
+
+/* Shared state with stats and file external state */
+static pgTracingSharedState * pg_tracing = NULL;
+
+/*
+ * Shared buffer storing spans. Query with sampled flag will add new spans to
+ * the shared state. Those spans will be consumed during calls to
+ * pg_tracing_spans
+ */
+static pgTracingSpans * shared_spans = NULL;
+
+/*
+ * Shared buffer storing trace context for parallel workers.
+ */
+static pgTracingParallelWorkers * pg_tracing_parallel = NULL;
+
+/* Index of the parallel worker context shared buffer if any */
+static int	parallel_context_index = -1;
+
+/*
+* Text for spans are buffered in this
+* stringinfo and written at the end of the tracing in a single write.
+*/
+static StringInfo current_trace_text;
+
+/* Start timestamp of the last executor finish */
+static TimestampTz executor_finish_start_ts;
+
+/*
+ * Maximum nested level for a query to know how many top spans we need to
+ * copy in shared_spans
+ */
+static int	max_nested_level = -1;
+
+/* Current nesting depth of planner calls */
+static int	plan_nested_level = 0;
+
+/* Current nesting depth of ExecutorRun+ProcessUtility calls */
+static int	exec_nested_level = 0;
+
+/* Number of allocated levels. */
+static int	allocated_nested_level = 0;
+
+/*
+ * Per nested level buffers
+ */
+typedef struct pgTracingPerLevelBuffer
+{
+	int			top_span_index; /* Index of the top span in the
+								 * current_trace_spans buffer */
+	int			parse_span_index;	/* Index of the latest parse span in the
+									 * current_trace_spans */
+	uint64		executor_id;	/* Span id of executor run spans by nested
+								 * level Executor run is used as parent for
+								 * spans generated from planstate */
+	uint64_t	query_id;		/* Query id by nested level when available */
+}			pgTracingPerLevelBuffer;
+
+static pgTracingPerLevelBuffer * per_level_buffers;
+
+/*
+ * Store all spans for the curent trace except top spans and parse spans.
+ * They will be added to the shared buffer at the end of query tracing.
+ *
+ * This buffer can be dynamically resized so care needs to be taken when using span pointers.
+ */
+static pgTracingSpans * current_trace_spans;
+
+/*
+ * Timestamp of the latest statement checked for sampling.
+ */
+static TimestampTz last_statement_check_for_sampling = 0;
+
+static void pg_tracing_shmem_request(void);
+static void pg_tracing_shmem_startup(void);
+
+/* Saved hook values in case of unload */
+static shmem_request_hook_type prev_shmem_request_hook = NULL;
+static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
+static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
+static planner_hook_type prev_planner_hook = NULL;
+static ExecutorStart_hook_type prev_ExecutorStart = NULL;
+static ExecutorRun_hook_type prev_ExecutorRun = NULL;
+static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
+static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
+static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+
+static void pg_tracing_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate);
+static PlannedStmt *pg_tracing_planner_hook(Query *parse,
+											const char *query_string,
+											int cursorOptions,
+											ParamListInfo params);
+static void pg_tracing_ExecutorStart(QueryDesc *queryDesc, int eflags);
+static void pg_tracing_ExecutorRun(QueryDesc *queryDesc,
+								   ScanDirection direction,
+								   uint64 count, bool execute_once);
+static void pg_tracing_ExecutorFinish(QueryDesc *queryDesc);
+static void pg_tracing_ExecutorEnd(QueryDesc *queryDesc);
+static void pg_tracing_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
+									  bool readOnlyTree,
+									  ProcessUtilityContext context, ParamListInfo params,
+									  QueryEnvironment *queryEnv,
+									  DestReceiver *dest, QueryCompletion *qc);
+
+static void generate_member_nodes(PlanState **planstates, int nplans, planstateTraceContext * planstateTraceContext, uint64 parent_id);
+static void generate_span_from_planstate(PlanState *planstate, planstateTraceContext * planstateTraceContext, uint64 parent_id);
+
+#define PG_TRACING_INFO_COLS	6
+#define PG_TRACING_TRACES_COLS	43
+
+/*
+ * Module load callback
+ */
+void
+_PG_init(void)
+{
+	if (!process_shared_preload_libraries_in_progress)
+		return;
+
+	DefineCustomIntVariable("pg_tracing.max_span",
+							"Maximum number of spans stored in shared memory.",
+							NULL,
+							&pg_tracing_max_span,
+							1000,
+							0,
+							500000,
+							PGC_SUSET,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
+	DefineCustomIntVariable("pg_tracing.max_parameter_size",
+							"Maximum size of parameters. -1 to disable parameter in top span.",
+							NULL,
+							&pg_tracing_max_parameter_str,
+							1024,
+							0,
+							10000,
+							PGC_SUSET,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
+	DefineCustomBoolVariable("pg_tracing.instrument_buffers",
+							 "Instrument and add buffers usage in spans.",
+							 NULL,
+							 &pg_tracing_instrument_buffers,
+							 true,
+							 PGC_SUSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
+	DefineCustomBoolVariable("pg_tracing.instrument_wal",
+							 "Instrument and add WAL usage in spans.",
+							 NULL,
+							 &pg_tracing_instrument_wal,
+							 true,
+							 PGC_SUSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
+	DefineCustomIntVariable("pg_tracing.max_traced_parallel_workers",
+							"Maximum number of parallel workers traced. -1 to disable tracing of parallel workers.",
+							NULL,
+							&pg_tracing_max_traced_parallel_workers,
+							100,
+							0,
+							10000,
+							PGC_POSTMASTER,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
+	DefineCustomIntVariable("pg_tracing.initial_allocated_spans",
+							"The number of allocated spans at the start of a trace. A higher number can help avoid reallocation when extending the buffer.",
+							NULL,
+							&pg_tracing_initial_allocated_spans,
+							25,
+							0,
+							500,
+							PGC_USERSET,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
+	DefineCustomEnumVariable("pg_tracing.track",
+							 "Selects which statements are tracked by pg_tracing.",
+							 NULL,
+							 &pg_tracing_track,
+							 PG_TRACING_TRACK_ALL,
+							 track_options,
+							 PGC_USERSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
+	DefineCustomBoolVariable("pg_tracing.track_utility",
+							 "Selects whether utility commands are tracked by pg_tracing.",
+							 NULL,
+							 &pg_tracing_track_utility,
+							 true,
+							 PGC_USERSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
+	DefineCustomBoolVariable("pg_tracing.drop_on_full_buffer",
+							 "Selects whether buffered spans should be dropped when buffer is full.",
+							 NULL,
+							 &pg_tracing_drop_full_buffer,
+							 true,
+							 PGC_USERSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
+	DefineCustomRealVariable("pg_tracing.sample_rate",
+							 "Fraction of queries to process.",
+							 NULL,
+							 &pg_tracing_sample_rate,
+							 0.0,
+							 0.0,
+							 1.0,
+							 PGC_USERSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
+	DefineCustomRealVariable("pg_tracing.caller_sample_rate",
+							 "Fraction of queries with sampled flag to process.",
+							 NULL,
+							 &pg_tracing_caller_sample_rate,
+							 1.0,
+							 0.0,
+							 1.0,
+							 PGC_USERSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
+	MarkGUCPrefixReserved("pg_tracing");
+
+	/* For jumble state */
+	EnableQueryId();
+
+	/* Install hooks. */
+	prev_shmem_request_hook = shmem_request_hook;
+	shmem_request_hook = pg_tracing_shmem_request;
+
+	prev_shmem_startup_hook = shmem_startup_hook;
+	shmem_startup_hook = pg_tracing_shmem_startup;
+
+	prev_post_parse_analyze_hook = post_parse_analyze_hook;
+	post_parse_analyze_hook = pg_tracing_post_parse_analyze;
+
+	prev_planner_hook = planner_hook;
+	planner_hook = pg_tracing_planner_hook;
+
+	prev_ExecutorStart = ExecutorStart_hook;
+	ExecutorStart_hook = pg_tracing_ExecutorStart;
+	prev_ExecutorRun = ExecutorRun_hook;
+	ExecutorRun_hook = pg_tracing_ExecutorRun;
+	prev_ExecutorFinish = ExecutorFinish_hook;
+	ExecutorFinish_hook = pg_tracing_ExecutorFinish;
+	prev_ExecutorEnd = ExecutorEnd_hook;
+	ExecutorEnd_hook = pg_tracing_ExecutorEnd;
+
+	prev_ProcessUtility = ProcessUtility_hook;
+	ProcessUtility_hook = pg_tracing_ProcessUtility;
+}
+
+/*
+ * Estimate shared memory space needed.
+ */
+static Size
+pg_tracing_memsize(void)
+{
+	Size		size;
+
+	/* pg_tracing shared state */
+	size = sizeof(pgTracingSharedState);
+	/* span struct */
+	size = add_size(size, sizeof(pgTracingSpans));
+	/* the span variable array */
+	size = add_size(size, mul_size(pg_tracing_max_span, sizeof(Span)));
+	/* and the parallel workers context  */
+	size = add_size(size, mul_size(pg_tracing_max_traced_parallel_workers, sizeof(pgTracingParallelContext)));
+
+	return size;
+}
+
+/*
+ * shmem_request hook: request additional shared resources.  We'll allocate
+ * or attach to the shared resources in pgss_shmem_startup().
+ */
+static void
+pg_tracing_shmem_request(void)
+{
+	if (prev_shmem_request_hook)
+		prev_shmem_request_hook();
+	RequestAddinShmemSpace(pg_tracing_memsize());
+}
+
+/*
+ * Add current trace context to the parallel workers shared buffer.
+ */
+static void
+add_parallel_context(uint64 parent_id, uint64 query_id)
+{
+	volatile	pgTracingParallelWorkers *p = (volatile pgTracingParallelWorkers *) pg_tracing_parallel;
+
+	if (pg_tracing_max_traced_parallel_workers <= 0)
+		/* No tracing of parallel workers */
+		return;
+
+	Assert(parallel_context_index == -1);
+	SpinLockAcquire(&p->mutex);
+	for (int i = 0; i < pg_tracing_max_traced_parallel_workers; i++)
+	{
+		volatile	pgTracingParallelContext *ctx = p->trace_contexts + i;
+
+		Assert(ctx->leader_backend_id != MyBackendId);
+		if (ctx->leader_backend_id != InvalidBackendId)
+			continue;
+		/* Slot is available */
+		parallel_context_index = i;
+		ctx->leader_backend_id = MyBackendId;
+		ctx->trace_id = current_trace.trace_id;
+		ctx->parent_id = parent_id;
+		ctx->query_id = query_id;
+		ctx->start_trace = current_trace.start_trace;
+		break;
+	}
+	SpinLockRelease(&p->mutex);
+}
+
+/*
+ * Return a slot from the current_trace_spans buffer.
+ * current_trace_spans may be extended if needed.
+ */
+static int
+get_index_from_trace_spans(void)
+{
+	if (current_trace_spans->end + 1 >= current_trace_spans->max)
+	{
+		MemoryContext oldcxt;
+
+		/* Need to extend. */
+		/* TODO: Have a configurable limit? */
+		current_trace_spans->max *= 2;
+		oldcxt = MemoryContextSwitchTo(pg_tracing_mem_ctx);
+		current_trace_spans = repalloc(current_trace_spans, current_trace_spans->max * sizeof(Span));
+		MemoryContextSwitchTo(oldcxt);
+	}
+	return current_trace_spans->end++;
+}
+
+/*
+ * Get current_span from provided index
+ */
+static Span *
+get_span_from_index(int span_index)
+{
+	Assert(span_index < current_trace_spans->end);
+	return &current_trace_spans->spans[span_index];
+}
+
+/*
+ * Get top span for the provided nested level
+ */
+static Span *
+get_top_span_for_nested_level(int nested_level)
+{
+	Span	   *top_span;
+
+	Assert(nested_level <= max_nested_level);
+	if (per_level_buffers[nested_level].top_span_index == -1)
+		per_level_buffers[nested_level].top_span_index = get_index_from_trace_spans();
+	top_span = get_span_from_index(per_level_buffers[nested_level].top_span_index);
+	return top_span;
+}
+
+/*
+ * Set top span to the provided span index
+ */
+static void
+set_top_span(int nested_level, int top_span_index)
+{
+	Assert(nested_level <= max_nested_level);
+	Assert(top_span_index < current_trace_spans->max);
+	per_level_buffers[nested_level].top_span_index = top_span_index;
+}
+
+/*
+ * Get parse span for the provided nested level
+ */
+static Span *
+get_parse_span_from_nested_level(int nested_level, bool force_allocation)
+{
+	Span	   *parse_span;
+
+	if (force_allocation && per_level_buffers[nested_level].parse_span_index != -1)
+	{
+		parse_span = get_span_from_index(per_level_buffers[nested_level].parse_span_index);
+		Assert(parse_span->ended == true);
+	}
+	if (force_allocation || per_level_buffers[nested_level].parse_span_index == -1)
+		per_level_buffers[nested_level].parse_span_index = get_index_from_trace_spans();
+	parse_span = get_span_from_index(per_level_buffers[nested_level].parse_span_index);
+	return parse_span;
+}
+
+/*
+ * End the trace of the span at the provided index
+ */
+static void
+end_span_index(int span_index, const int64 *end_span_input)
+{
+	Span	   *span;
+
+	if (span_index < 0)
+		return;
+	span = get_span_from_index(span_index);
+	end_span(span, end_span_input);
+}
+
+/*
+ * End top span for the provided nested level
+ */
+static void
+end_top_span(int nested_level, const int64 *end_span_ns)
+{
+	Assert(nested_level <= max_nested_level);
+	Assert(per_level_buffers[nested_level].top_span_index < current_trace_spans->max);
+	Assert(per_level_buffers[nested_level].top_span_index != -1);
+	end_span_index(per_level_buffers[nested_level].top_span_index, end_span_ns);
+}
+
+/*
+ * Remove parallel context for the current leader from the shared memory.
+ */
+static void
+remove_parallel_context(void)
+{
+	if (parallel_context_index < 0)
+		/* No tracing of parallel workers */
+		return;
+	{
+		volatile	pgTracingParallelWorkers *p = (volatile pgTracingParallelWorkers *) pg_tracing_parallel;
+
+		SpinLockAcquire(&p->mutex);
+		p->trace_contexts[parallel_context_index].leader_backend_id = InvalidBackendId;
+		SpinLockRelease(&p->mutex);
+	}
+	parallel_context_index = -1;
+}
+
+/*
+ * If we're inside a parallel worker, check if the trace context is stored in shared memory.
+ * If a trace context exists, it means that the query is sampled and worker tracing is enabled.
+ */
+static void
+fetch_parallel_context(pgTracingTrace * trace)
+{
+	volatile	pgTracingParallelWorkers *p = (volatile pgTracingParallelWorkers *) pg_tracing_parallel;
+
+	if (pg_tracing_max_traced_parallel_workers <= 0)
+	{
+		/* No tracing of parallel workers */
+		return;
+	}
+	SpinLockAcquire(&p->mutex);
+	for (int i = 0; i < pg_tracing_max_traced_parallel_workers; i++)
+	{
+		if (p->trace_contexts[i].leader_backend_id != ParallelLeaderBackendId)
+			continue;
+		/* Found a matching a trace context, fetch it */
+		trace->trace_id = p->trace_contexts[i].trace_id;
+		trace->parent_id = p->trace_contexts[i].parent_id;
+		trace->start_trace = p->trace_contexts[i].start_trace;
+		trace->query_id = p->trace_contexts[i].query_id;
+		trace->sampled = true;
+	}
+	SpinLockRelease(&p->mutex);
+}
+
+/*
+ * shmem_startup hook: allocate or attach to shared memory, Also create and
+ * load the query-texts file, which is expected to exist (even if empty)
+ * while the module is enabled.
+ */
+static void
+pg_tracing_shmem_startup(void)
+{
+	bool		found_pg_tracing;
+	bool		found_shared_spans;
+	bool		found_parallel;
+
+	if (prev_shmem_startup_hook)
+		prev_shmem_startup_hook();
+
+	/* reset in case this is a restart within the postmaster */
+	pg_tracing = NULL;
+
+	/* Create or attach to the shared memory state */
+	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+
+	pg_tracing = ShmemInitStruct("PgTracing Shared", sizeof(pgTracingSharedState), &found_pg_tracing);
+	shared_spans = ShmemInitStruct("PgTracing Spans",
+								   sizeof(pgTracingSpans) + pg_tracing_max_span * sizeof(Span),
+								   &found_shared_spans);
+	pg_tracing_parallel = ShmemInitStruct("PgTracing Parallel Workers Context",
+										  sizeof(pgTracingParallelWorkers) + pg_tracing_max_traced_parallel_workers * sizeof(pgTracingParallelContext),
+										  &found_parallel);
+
+	/* Initialize pg_tracing memory context */
+	pg_tracing_mem_ctx = AllocSetContextCreate(TopMemoryContext,
+											   "pg_tracing memory context",
+											   ALLOCSET_DEFAULT_SIZES);
+
+	/* First time, let's init shared state */
+	if (!found_pg_tracing)
+	{
+		pg_tracing->stats.traces = 0;
+		pg_tracing->stats.stats_reset = GetCurrentTimestamp();
+		SpinLockInit(&pg_tracing->mutex);
+		SpinLockInit(&pg_tracing_parallel->mutex);
+	}
+	if (!found_shared_spans)
+	{
+		shared_spans->end = 0;
+		shared_spans->max = pg_tracing_max_span;
+	}
+	if (!found_parallel)
+	{
+		for (int i = 0; i < pg_tracing_max_traced_parallel_workers; i++)
+			pg_tracing_parallel->trace_contexts[i].leader_backend_id = InvalidBackendId;
+	}
+	LWLockRelease(AddinShmemInitLock);
+}
+
+/*
+ * Add span to the shared memory. Mutex should already have been acquired.
+ */
+static void
+add_span_to_shared_buffer_locked(const Span * span)
+{
+	/* Spans must be ended before adding them to the shared buffer */
+	Assert(span->ended);
+	if (shared_spans->end + 1 >= shared_spans->max)
+		pg_tracing->stats.dropped_spans++;
+	else
+	{
+		pg_tracing->stats.spans++;
+		shared_spans->spans[shared_spans->end++] = *span;
+	}
+}
+
+/*
+ * Check if we still have available space in the shared spans.
+ *
+ * Between the moment we check and the moment we want to insert, the buffer
+ * may be full but we redo a check before appending the span. This is done
+ * early when starting a top span to bail out early if the buffer is
+ * already full since we don't immediately add the span in the shared buffer.
+ */
+static bool
+check_full_shared_spans()
+{
+	if (shared_spans->end + 1 >= shared_spans->max)
+	{
+		volatile	pgTracingSharedState *s = (volatile pgTracingSharedState *) pg_tracing;
+		bool		full_buffer = false;
+
+		SpinLockAcquire(&s->mutex);
+		if (pg_tracing_drop_full_buffer)
+		{
+			s->stats.dropped_spans += shared_spans->end;
+			shared_spans->end = 0;
+			s->extent = 0;
+		}
+		else
+		{
+			full_buffer = true;
+			s->stats.dropped_spans++;
+		}
+		SpinLockRelease(&s->mutex);
+		return full_buffer;
+	}
+	return false;
+}
+
+/*
+ * Process a query descriptor: Gather all query instrumentation in the query
+ * span counters and generate span nodes from queryDesc planstate
+ */
+static void
+process_query_desc(QueryDesc *queryDesc, int sql_error_code)
+{
+	MemoryContext oldcxt;
+	uint64		parent_id = per_level_buffers[exec_nested_level].executor_id;
+	NodeCounters *node_counters = &get_top_span_for_nested_level(exec_nested_level)->node_counters;
+
+	oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+	/* Process total counters */
+	if (queryDesc->totaltime)
+	{
+		InstrEndLoop(queryDesc->totaltime);
+		node_counters->buffer_usage = queryDesc->totaltime->bufusage;
+		node_counters->wal_usage = queryDesc->totaltime->walusage;
+	}
+	if (queryDesc->estate->es_jit)
+		node_counters->jit_usage = queryDesc->estate->es_jit->instr;
+	node_counters->rows = queryDesc->estate->es_total_processed;
+
+	/* Process planstate */
+	if (queryDesc->planstate)
+	{
+		Bitmapset  *rels_used = NULL;
+		planstateTraceContext planstateTraceContext;
+
+		planstateTraceContext.rtable_names = select_rtable_names_for_explain(queryDesc->plannedstmt->rtable, rels_used);
+		planstateTraceContext.trace = &current_trace;
+		planstateTraceContext.ancestors = NULL;
+		planstateTraceContext.sql_error_code = sql_error_code;
+		planstateTraceContext.deparse_ctx = deparse_context_for_plan_tree(queryDesc->plannedstmt, planstateTraceContext.rtable_names);
+
+		generate_span_from_planstate(queryDesc->planstate, &planstateTraceContext, parent_id);
+	}
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * If the query was started as a prepared statement, we won't be able to
+ * extract traceparent during query parsing since parsing was skipped.
+ *
+ * We assume that SQLCommenter content can be passed as a text in the
+ * first parameter
+ */
+static void
+check_traceparameter_in_parameter(ParamListInfo params)
+{
+	Oid			typoutput;
+	bool		typisvarlena;
+	char	   *pstring;
+	ParamExternData param;
+
+	if (params == NULL || params->numParams == 0)
+		return;
+
+	param = params->params[0];
+	/* Check first parameter type */
+	if (param.ptype != TEXTOID)
+		return;
+
+	/* Get the text of the first string parameter */
+	getTypeOutputInfo(param.ptype, &typoutput, &typisvarlena);
+	pstring = OidOutputFunctionCall(typoutput, param.value);
+	current_trace = extract_traceparent(pstring, true);
+}
+
+/*
+ * If trace was started from the global sample rate without
+ * a parent trace, we need to generate a random trace id
+ */
+static void
+set_traceparent_trace_id(struct pgTracingTrace *traceparent_parameter)
+{
+	if (traceparent_parameter->trace_id != 0)
+		return;
+
+	/*
+	 * We leave parent_id to 0 as a way to indicate that this is a standalone
+	 * trace.
+	 */
+	Assert(traceparent_parameter->parent_id == 0);
+	traceparent_parameter->trace_id = pg_prng_uint64(&pg_global_prng_state);
+}
+
+/*
+ * Decide whether a query should be sampled depending on the traceparent sampled
+ * and the provided sample rate configurations
+ */
+static bool
+apply_sample_rate(struct pgTracingTrace const *traceparent_parameter)
+{
+	double		rand;
+	bool		sampled;
+
+	/* Everything is sampled */
+	if (pg_tracing_sample_rate >= 1.0)
+		return true;
+
+	/* No SQLCommenter sampled and no global sample rate */
+	if (traceparent_parameter->sampled == 0 && pg_tracing_sample_rate == 0.0)
+		return false;
+
+	/* SQLCommenter sampled caller sample */
+	if (traceparent_parameter->sampled == 1 && pg_tracing_caller_sample_rate >= 1.0)
+		return true;
+
+	/*
+	 * We have a non zero global sample rate or a sampled flag. Either way, we
+	 * need a rand value
+	 */
+	rand = pg_prng_double(&pg_global_prng_state);
+	if (traceparent_parameter->sampled == 1)
+		/* Sampled flag case */
+		sampled = (rand < pg_tracing_caller_sample_rate);
+	else
+		/* Global rate case */
+		sampled = (rand < pg_tracing_sample_rate);
+	return sampled;
+}
+
+/*
+ * Check whether the current query should be sampled or not
+ */
+static void
+check_query_sampled(ParseState *pstate, const ParamListInfo params)
+{
+	TimestampTz statement_start_ts;
+
+	/* Safety check... */
+	if (!pg_tracing || !pg_tracing_enabled(exec_nested_level))
+		return;
+
+	/* sampling already started */
+	if (current_trace.sampled > 0)
+		return;
+
+	/* Don't start tracing if we're not at the top */
+	if (exec_nested_level > 0)
+		return;
+
+	/*
+	 * In a parallel worker, check the parallel context shared buffer to see
+	 * if the leader left a trace context
+	 */
+	if (IsParallelWorker())
+	{
+		fetch_parallel_context(&current_trace);
+		return;
+	}
+
+	Assert(max_nested_level = -1);
+
+	current_trace.trace_id = 0;
+	current_trace.parent_id = 0;
+	if (params != NULL)
+		check_traceparameter_in_parameter(params);
+	else if (pstate != NULL)
+		current_trace = extract_traceparent(pstate->p_sourcetext, false);
+
+	/*
+	 * A statement can go through this function 3 times: post parsing, planner
+	 * and executor run. If no sampling flag was extracted from SQLCommenter
+	 * and we've already seen this statement (using statement_start ts), don't
+	 * try to apply sample rate and exit.
+	 */
+	statement_start_ts = GetCurrentStatementStartTimestamp();
+	if (current_trace.sampled == 0 && last_statement_check_for_sampling == statement_start_ts)
+		return;
+	last_statement_check_for_sampling = statement_start_ts;
+
+	current_trace.sampled = apply_sample_rate(&current_trace);
+
+	if (current_trace.sampled > 0 && check_full_shared_spans())
+		/* Buffer is full, abort sampling */
+		current_trace.sampled = 0;
+
+	if (current_trace.sampled > 0)
+		set_traceparent_trace_id(&current_trace);
+}
+
+/*
+ * Adjust spans' offsets to the provided file offset
+ */
+static void
+adjust_file_offset(Span * span, Size file_position)
+{
+	if (span->name_offset != -1)
+		span->name_offset += file_position;
+	if (span->operation_name_offset != -1)
+		span->operation_name_offset += file_position;
+	if (span->parameter_offset != -1)
+		span->parameter_offset += file_position;
+}
+
+/*
+ * Get the start time for a new top span
+ */
+static int64
+get_top_span_start(void)
+{
+	/* For the top span of parallel worker, we get the current ns */
+	if (exec_nested_level == 0 && IsParallelWorker())
+		return get_current_ns();
+	/* For the root top span, use the trace start */
+	if (exec_nested_level == 0)
+	{
+		Assert(current_trace.start_trace.ns > 0);
+		return current_trace.start_trace.ns;
+	}
+
+	/*
+	 * We're in a nested query, use set nested start. This is only set if
+	 * we're inside a ProcessUtility. Other nested queries (like function
+	 * call) will have their start/duration adjusted when queryDesc is
+	 * processed.
+	 */
+	return nested_query_start_ns;
+}
+
+/*
+ * Reset pg_tracing memory context and global state.
+ */
+static void
+cleanup_tracing(void)
+{
+	if (current_trace.sampled == 0)
+		/* No need for cleaning */
+		return;
+	remove_parallel_context();
+	MemoryContextReset(pg_tracing_mem_ctx);
+	current_trace.sampled = 0;
+	current_trace.trace_id = 0;
+	current_trace.parent_id = 0;
+	max_nested_level = -1;
+	current_trace_spans = NULL;
+	per_level_buffers = NULL;
+}
+
+/*
+ * End of the query tracing, dump all of top spans in the shared
+ * memory. This may happen either when query is finished or on a caught error.
+ */
+static void
+end_tracing(const int64 *end_span_ns)
+{
+	Size		file_position = 0;
+
+	/* We're still a nested query, tracing is not finished */
+	if (exec_nested_level > 0)
+		return;
+
+	/* Dump all buffered texts in file */
+	text_store_file(pg_tracing, current_trace_text->data, current_trace_text->len, &file_position);
+
+	for (int i = 0; i < current_trace_spans->end; i++)
+		adjust_file_offset(current_trace_spans->spans + i, file_position);
+
+	{
+		volatile	pgTracingSharedState *s = (volatile pgTracingSharedState *) pg_tracing;
+
+		SpinLockAcquire(&s->mutex);
+		/* We're at the end, add all spans to the shared memory */
+		for (int i = 0; i < current_trace_spans->end; i++)
+			add_span_to_shared_buffer_locked(&current_trace_spans->spans[i]);
+		SpinLockRelease(&s->mutex);
+	}
+
+	/* We can reset the memory context here */
+	cleanup_tracing();
+}
+
+/*
+ * When we catch an error (timeout, cancel query), we need to flag the ongoing
+ * span with an error, send current spans in the shared buffer and clean
+ * our memory context
+ */
+static void
+handle_pg_error(int span_index, QueryDesc *queryDesc)
+{
+	Span	   *span;
+	Span	   *top_span;
+	int64		end_span_ns;
+	int			sql_error_code;
+
+	/* If we're not sampling the query, bail out */
+	if (current_trace.sampled == 0 && !pg_tracing_enabled(exec_nested_level))
+		return;
+
+	if (span_index < 0)
+		return;
+
+	sql_error_code = geterrcode();
+	/* Assign the error code to the latest span */
+	span = get_span_from_index(span_index);
+	span->sql_error_code = sql_error_code;
+
+	/* Same for top span */
+	top_span = get_top_span_for_nested_level(exec_nested_level);
+	top_span->sql_error_code = sql_error_code;
+
+	/*
+	 * Order matters there. We want to process query desc before getting the
+	 * end time otherwise, the span nodes will have a higher duration than
+	 * their parents
+	 */
+	if (queryDesc != NULL)
+		process_query_desc(queryDesc, sql_error_code);
+
+	/* End current span and ongoing top spans */
+	end_span_ns = get_current_ns();
+	for (int i = 0; i <= exec_nested_level; i++)
+		end_top_span(i, &end_span_ns);
+	end_span(span, &end_span_ns);
+	end_tracing(&end_span_ns);
+}
+
+/*
+ * Get the parent id for the given nested level
+ *
+ * nested_level can be negative when creating the top top span.
+ * In this case, we want to use the trace parent id.
+ */
+static uint64
+get_parent_id(int nested_level)
+{
+	Span	   *top_span;
+
+	if (nested_level < 0)
+		return current_trace.parent_id;
+	Assert(nested_level <= allocated_nested_level);
+	top_span = get_top_span_for_nested_level(nested_level);
+	Assert(top_span->span_id > 0);
+	return top_span->span_id;
+}
+
+/*
+ * If we enter a new nested level, initialize all necessary buffers
+ * and start_trace timer.
+ *
+ * The start of a top span can vary: prepared statement will skip parsing, the use of cached
+ * plans will skip the planner hook.
+ * Thus, a top span can start in either post parse, planner hook or executor run.
+ *
+ * Since this is called after the we've detected the start of a trace, we check for available
+ * space in the buffer. If the buffer is full, we abort tracing by setting sampled to false.
+ * Callers need to check that tracing was aborted.
+ */
+static bool
+initialize_trace_level(void)
+{
+	/* Check if we've already created a top span for this nested level */
+	if (exec_nested_level <= max_nested_level)
+		return false;
+
+	/* First time */
+	if (max_nested_level == -1)
+	{
+		MemoryContext oldcxt;
+
+		Assert(pg_tracing_mem_ctx->isReset);
+
+		/*
+		 * We need to be able to pass informations that depend on the nested
+		 * level.
+		 *
+		 * executor span ids: Since an executor run becomes the parent span,
+		 * we need subsequent created node spans to have the correct parent.
+		 *
+		 * query ids: used to propagate queryId to all spans in the same level
+		 *
+		 * top spans: We create one top span per nested level and those are
+		 * only inserted in the shared buffer at the end
+		 *
+		 * parse spans: We don't know the parent of nested parse span until we
+		 * process the plan node. We keep them in this buffer to be able to
+		 * change their parent.
+		 */
+		oldcxt = MemoryContextSwitchTo(pg_tracing_mem_ctx);
+		/* initial allocation */
+		allocated_nested_level = 1;
+		per_level_buffers = palloc0(allocated_nested_level * sizeof(pgTracingPerLevelBuffer));
+		/* 0 is a valid index, initialise all indexes to -1 */
+		for (int i = 0; i < allocated_nested_level; i++)
+		{
+			per_level_buffers[i].top_span_index = -1;
+			per_level_buffers[i].parse_span_index = -1;
+		}
+		current_trace_spans = palloc0(pg_tracing_initial_allocated_spans * sizeof(Span));
+		current_trace_spans->max = pg_tracing_initial_allocated_spans;
+		current_trace_text = makeStringInfo();
+
+		MemoryContextSwitchTo(oldcxt);
+
+		/*
+		 * Parallel workers will get the referential timers from the leader
+		 * parallel context
+		 */
+		if (!IsParallelWorker())
+		{
+			/* Start referential timers */
+			current_trace.start_trace.ts = GetCurrentStatementStartTimestamp();
+
+			/*
+			 * We can't get the monotonic clock at the start of the statement
+			 * so let's estimate it from here.
+			 */
+			current_trace.start_trace.ns = get_current_ns() - (GetCurrentTimestamp() - current_trace.start_trace.ts) * NS_PER_US;
+		}
+	}
+	else if (exec_nested_level >= allocated_nested_level)
+	{
+		/* New nested level, allocate more memory */
+		MemoryContext oldcxt;
+		int			old_allocated_nested_level = allocated_nested_level;
+
+		oldcxt = MemoryContextSwitchTo(pg_tracing_mem_ctx);
+		allocated_nested_level++;
+		per_level_buffers = repalloc0(per_level_buffers, old_allocated_nested_level * sizeof(pgTracingPerLevelBuffer), allocated_nested_level * sizeof(pgTracingPerLevelBuffer));
+		for (int i = old_allocated_nested_level; i < allocated_nested_level; i++)
+		{
+			per_level_buffers[i].top_span_index = -1;
+			per_level_buffers[i].parse_span_index = -1;
+		}
+		MemoryContextSwitchTo(oldcxt);
+	}
+
+	max_nested_level = exec_nested_level;
+	if (exec_nested_level == 0)
+	{
+		/* Start of a new trace */
+		volatile	pgTracingSharedState *s = (volatile pgTracingSharedState *) pg_tracing;
+
+		SpinLockAcquire(&s->mutex);
+		s->stats.traces++;
+		SpinLockRelease(&s->mutex);
+	}
+	return true;
+}
+
+/*
+ * Add a new string to the current_trace_text stringinfo
+ */
+static int
+add_str_to_trace_buffer(const char *str, int str_len)
+{
+	int			position = current_trace_text->cursor;
+
+	appendBinaryStringInfo(current_trace_text, str, str_len);
+	appendStringInfoChar(current_trace_text, '\0');
+	current_trace_text->cursor = current_trace_text->len;
+	return position;
+}
+
+/*
+ * Add the worker name to the current_trace_text stringinfo
+ */
+static int
+add_worker_name_to_trace_buffer()
+{
+	int			position = current_trace_text->cursor;
+
+	appendStringInfo(current_trace_text, "Worker %d", ParallelWorkerNumber);
+	appendStringInfoChar(current_trace_text, '\0');
+	current_trace_text->cursor = current_trace_text->len;
+	return position;
+}
+
+/*
+ * Start a new top span if we've entered a new nested level
+ */
+static void
+begin_top_span(Span * top_span, CmdType commandType,
+			   const Query *query, const JumbleState *jstate,
+			   const char *query_text, int64 start_time_ns)
+{
+	int			query_len;
+	const char *normalised_query;
+
+	/* in case of a cached plan, query might be unavailable */
+	if (query != NULL)
+		per_level_buffers[exec_nested_level].query_id = query->queryId;
+	else if (current_trace.query_id > 0)
+		per_level_buffers[exec_nested_level].query_id = current_trace.query_id;
+
+	begin_span(top_span, command_type_to_span_type(commandType),
+			   &current_trace, get_parent_id(exec_nested_level - 1), per_level_buffers[exec_nested_level].query_id, &start_time_ns, exec_nested_level);
+	top_span->top_span = true;
+
+	if (IsParallelWorker())
+	{
+		/*
+		 * In a parallel worker, we use the worker name as the span's
+		 * operation
+		 */
+		top_span->operation_name_offset = add_worker_name_to_trace_buffer();
+		return;
+	}
+
+	if (jstate && jstate->clocations_count > 0 && query != NULL)
+	{
+		/* jstate is available, normalise query and extract parameters' values */
+		char	   *paramStr;
+
+		query_len = query->stmt_len;
+		normalised_query = normalise_query_parameters(jstate, query_text,
+													  query->stmt_location,
+													  &query_len, &paramStr);
+		top_span->parameter_offset = add_str_to_trace_buffer(paramStr, strlen(paramStr));
+	}
+	else
+	{
+		/*
+		 * No jstate available, normalise query but we won't be able to
+		 * extract parameters
+		 */
+		int			stmt_location;
+
+		if (query != NULL && query->stmt_len > 0)
+		{
+			query_len = query->stmt_len;
+			stmt_location = query->stmt_location;
+		}
+		else
+		{
+			query_len = strlen(query_text);
+			stmt_location = 0;
+		}
+		normalised_query = normalise_query(query_text, stmt_location, &query_len);
+	}
+	top_span->operation_name_offset = add_str_to_trace_buffer(normalised_query, query_len);
+}
+
+/*
+ * Initialise buffer if we are in a new nested level and start top span.
+ */
+static int64
+initialize_trace_level_and_top_span(CmdType commandType, Query *query, JumbleState *jstate, const char *query_text)
+{
+	bool		new_nested_level = initialize_trace_level();
+	int64		top_span_start_ns = get_top_span_start();
+	Span	   *current_top = get_top_span_for_nested_level(exec_nested_level);
+
+	/* current_trace_spans buffer should have been allocated */
+	Assert(current_trace_spans != NULL);
+	if (new_nested_level)
+	{
+		begin_top_span(current_top, commandType, query, jstate, query_text, top_span_start_ns);
+	}
+	else if (current_top->ended)
+	{
+		/*
+		 * It's possible to have multiple top spans within a nested query. If
+		 * the previous top span was closed, start a new one
+		 */
+		int			span_index = get_index_from_trace_spans();
+		Span	   *span = get_span_from_index(span_index);
+
+		set_top_span(exec_nested_level, span_index);
+		begin_top_span(span, commandType, query, jstate, query_text, top_span_start_ns);
+	}
+	return top_span_start_ns;
+}
+
+/*
+ * Post-parse-analyze hook
+ *
+ * Tracing can be started here if:
+ * - The query has a SQLCommenter with traceparent parameter with sampled flag on and passes the caller_sample_rate
+ * - The query passes the sample_rate
+ *
+ * If query is sampled, create a post parse span and start the top span.
+ */
+static void
+pg_tracing_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
+{
+	Span	   *parse_span = NULL;
+	Span	   *post_parse_span;
+	int			post_parse_index = -1;
+	int64		start_post_parse_ns;
+	int64		end_parse_ns;
+	uint64		parent_id;
+	int64		top_span_start_ns;
+	bool		leave_parse_span_open = false;
+
+	if (prev_post_parse_analyze_hook)
+		prev_post_parse_analyze_hook(pstate, query, jstate);
+
+	/*
+	 * In case there's an multi statement transaction using extended protocol,
+	 * we will have the PREPARE of the next statement before we go through the
+	 * ExecutorEnd of the previous statement.
+	 *
+	 * For now, we skip parse span and will only start the next statement
+	 * tracing during planner hook.
+	 *
+	 * TODO: Find a way to keep parse span for the next statement
+	 */
+	if (!pg_tracing_mem_ctx->isReset && exec_nested_level == 0)
+	{
+		return;
+	}
+
+	/* If disabled, don't trace utility statement */
+	if (query->utilityStmt && !pg_tracing_track_utility)
+		return;
+
+	check_query_sampled(pstate, NULL);
+
+	if (current_trace.sampled == 0)
+		/* Query is not sampled, nothing to do */
+		return;
+
+	/*
+	 * We want to avoid calling get_ns at the start of post parse as it will
+	 * impact all queries and we will only use it when the query is sampled.
+	 */
+	start_post_parse_ns = get_current_ns();
+	end_parse_ns = start_post_parse_ns - 1000;
+
+	/*
+	 * Either we're inside a nested sampled query or we've parsed a query with
+	 * the sampled flag, start a new level with a top span
+	 */
+	top_span_start_ns = initialize_trace_level_and_top_span(query->commandType, query, jstate, pstate->p_sourcetext);
+
+	parent_id = get_parent_id(exec_nested_level);
+	Assert(parent_id > 0);
+
+	if (top_span_start_ns == 0)
+	{
+		/*
+		 * We are inside a nested query that wasn't started from
+		 * ProcessUtility. We will rely on reparent span at the executor end
+		 * to set the parse start and duration will be used to store the parse
+		 * span end.
+		 */
+		top_span_start_ns = end_parse_ns;
+		leave_parse_span_open = true;
+		/* TODO: Handle this? */
+	}
+
+	if (!leave_parse_span_open)
+	{
+		/*
+		 * We need to keep track of the parse span. Within nested queries, and
+		 * outside of processUtility, we only know of the start of the parse
+		 * span when we process planstate nodes at the end of the Executor. In
+		 * this case, top_span_start_ns is set to 0
+		 */
+		parse_span = get_parse_span_from_nested_level(exec_nested_level, true);
+		begin_span(parse_span, SPAN_PARSE,
+				   &current_trace, parent_id, per_level_buffers[exec_nested_level].query_id,
+				   &top_span_start_ns, exec_nested_level);
+
+		/*
+		 * In case of queries nested in a result, we don't have the parse
+		 * start. We will need to update this span when processing the
+		 * queryDesc.
+		 */
+		end_span(parse_span, &end_parse_ns);
+	}
+
+	/* Post parse span */
+	post_parse_index = get_index_from_trace_spans();
+	post_parse_span = get_span_from_index(post_parse_index);
+	begin_span(post_parse_span, SPAN_POST_PARSE,
+			   &current_trace,
+			   parent_id, per_level_buffers[exec_nested_level].query_id,
+			   &start_post_parse_ns, exec_nested_level);
+	if (parse_span != NULL && parse_span->start + parse_span->duration_ns / NS_PER_US >= post_parse_span->start)
+
+		/*
+		 * We want to avoid parse spans ending at the exact same microsend as
+		 * the post parse span. Artificially increase post parse time if it's
+		 * the case
+		 */
+		post_parse_span->start++;
+	end_span(post_parse_span, NULL);
+}
+
+/*
+ * Planner hook
+ *
+ * Tracing can start here if the query skipped parsing (prepared query) and passes the random sample_rate.
+ *
+ * If query is sampled, create a plan span and start the top span if not already started.
+ */
+static PlannedStmt *
+pg_tracing_planner_hook(Query *query,
+						const char *query_string,
+						int cursorOptions,
+						ParamListInfo params)
+{
+	PlannedStmt *result;
+	int			span_planner_index = -1;
+	Span	   *span_planner;
+
+	check_query_sampled(NULL, params);
+
+	/* No sampling */
+	if (current_trace.sampled == 0 || !pg_tracing_enabled(plan_nested_level + exec_nested_level))
+	{
+		if (prev_planner_hook)
+			result = prev_planner_hook(query, query_string, cursorOptions,
+									   params);
+		else
+			result = standard_planner(query, query_string, cursorOptions,
+									  params);
+		return result;
+	}
+
+	/*
+	 * We may have skipped parsing if statement was prepared, start a new top
+	 * span if we don't have one
+	 */
+	if (current_trace.sampled == 1 && pg_tracing_enabled(plan_nested_level + exec_nested_level))
+		initialize_trace_level_and_top_span(query->commandType, query, NULL, query_string);
+
+	/* Start planner span */
+	span_planner_index = get_index_from_trace_spans();
+	span_planner = get_span_from_index(span_planner_index);
+	begin_span(span_planner, SPAN_PLANNER, &current_trace,
+			   get_parent_id(exec_nested_level), per_level_buffers[exec_nested_level].query_id, NULL, exec_nested_level);
+
+	plan_nested_level++;
+	PG_TRY();
+	{
+		if (prev_planner_hook)
+			result = prev_planner_hook(query, query_string, cursorOptions,
+									   params);
+		else
+			result = standard_planner(query, query_string, cursorOptions,
+									  params);
+	}
+	PG_CATCH();
+	{
+		plan_nested_level--;
+		handle_pg_error(span_planner_index, NULL);
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+	plan_nested_level--;
+
+	/* End planner span */
+	end_span_index(span_planner_index, NULL);
+
+	/* If we have a prepared statement, add bound parameters to the top span */
+	if (params != NULL)
+	{
+		char	   *paramStr = BuildParamLogString(params,
+												   NULL, pg_tracing_max_parameter_str);
+		Span	   *current_top = get_top_span_for_nested_level(exec_nested_level);
+
+		Assert(current_top->parameter_offset == -1);
+		if (paramStr != NULL)
+			current_top->parameter_offset = add_str_to_trace_buffer(paramStr, strlen(paramStr));
+	}
+	return result;
+}
+
+/*
+ * ExecutorStart hook: Activate query instrumentation if query is sampled
+ *
+ * Tracing can be started here if the query used a cached plan and passes the random sample_rate
+ * If query is sampled, start the top span if not already started.
+ */
+static void
+pg_tracing_ExecutorStart(QueryDesc *queryDesc, int eflags)
+{
+	int			executor_start_span_index = -1;
+	Span	   *executor_span;
+	int			instrument_options = INSTRUMENT_TIMER | INSTRUMENT_ROWS;
+
+	check_query_sampled(NULL, queryDesc->params);
+
+	if (pg_tracing_instrument_buffers)
+		instrument_options |= INSTRUMENT_BUFFERS;
+	if (pg_tracing_instrument_wal)
+		instrument_options |= INSTRUMENT_WAL;
+
+	if (current_trace.sampled > 0 && pg_tracing_enabled(exec_nested_level))
+	{
+		/*
+		 * In case of a cached plan, we haven't gone through neither parsing
+		 * nor planner hook.
+		 *
+		 * Parallel workers will also start tracing from there.
+		 */
+		initialize_trace_level_and_top_span(queryDesc->operation, NULL, NULL, queryDesc->sourceText);
+		executor_start_span_index = get_index_from_trace_spans();
+		executor_span = get_span_from_index(executor_start_span_index);
+		begin_span(executor_span, SPAN_EXECUTOR_START, &current_trace, get_parent_id(exec_nested_level), per_level_buffers[exec_nested_level].query_id, NULL, exec_nested_level);
+
+		/*
+		 * Activate query instrumentation. Timer and rows instrumentation are
+		 * mandatory.
+		 */
+		queryDesc->instrument_options = instrument_options;
+	}
+
+	if (prev_ExecutorStart)
+		prev_ExecutorStart(queryDesc, eflags);
+	else
+		standard_ExecutorStart(queryDesc, eflags);
+
+	if (executor_start_span_index != -1)
+	{
+		/* Allocate instrumentation */
+		if (queryDesc->totaltime == NULL)
+		{
+			MemoryContext oldcxt;
+
+			oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+			queryDesc->totaltime = InstrAlloc(1, instrument_options, false);
+			queryDesc->planstate->instrument = InstrAlloc(1, instrument_options, false);
+			MemoryContextSwitchTo(oldcxt);
+		}
+		end_span_index(executor_start_span_index, NULL);
+	}
+}
+
+/*
+ * ExecutorRun hook: track nesting depth and create executor run span.
+ * If the plan needs to create parallel workers, push the trace context in parallel shared buffer.
+ */
+static void
+pg_tracing_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction,
+					   uint64 count, bool execute_once)
+{
+	int			span_index = -1;
+
+	if (current_trace.sampled > 0 && pg_tracing_enabled(exec_nested_level))
+	{
+		Span	   *span;
+
+		span_index = get_index_from_trace_spans();
+		span = get_span_from_index(span_index);
+		begin_span(span, SPAN_EXECUTOR_RUN, &current_trace, get_parent_id(exec_nested_level), per_level_buffers[exec_nested_level].query_id, NULL, exec_nested_level);
+
+		/*
+		 * Executor run is used as the parent's span for nodes, store it's id
+		 * in the per level buffer.
+		 */
+		per_level_buffers[exec_nested_level].executor_id = span->span_id;
+		if (queryDesc->plannedstmt->parallelModeNeeded)
+			add_parallel_context(span->span_id, per_level_buffers[exec_nested_level].query_id);
+	}
+
+	exec_nested_level++;
+	PG_TRY();
+	{
+		if (prev_ExecutorRun)
+			prev_ExecutorRun(queryDesc, direction, count, execute_once);
+		else
+			standard_ExecutorRun(queryDesc, direction, count, execute_once);
+	}
+	PG_CATCH();
+	{
+		exec_nested_level--;
+		handle_pg_error(span_index, queryDesc);
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+	exec_nested_level--;
+
+	/*
+	 * When a query is executing the pg_tracing_spans function, sampling will
+	 * be disabled while we have an ongoing trace. Check for
+	 * current_trace_spans
+	 */
+	if (current_trace_spans != NULL)
+		end_span_index(span_index, NULL);
+}
+
+/*
+ * ExecutorFinish hook: create executor finish span and track nesting depth
+ */
+static void
+pg_tracing_ExecutorFinish(QueryDesc *queryDesc)
+{
+	int			span_index = -1;
+	Span	   *span = NULL;
+
+	if (current_trace.sampled > 0 && pg_tracing_enabled(exec_nested_level))
+	{
+		span_index = get_index_from_trace_spans();
+		span = get_span_from_index(span_index);
+		begin_span(span, SPAN_EXECUTOR_FINISH, &current_trace, get_parent_id(exec_nested_level), per_level_buffers[exec_nested_level].query_id, NULL, exec_nested_level);
+		executor_finish_start_ts = span->start;
+	}
+
+	exec_nested_level++;
+	PG_TRY();
+	{
+		if (prev_ExecutorFinish)
+			prev_ExecutorFinish(queryDesc);
+		else
+			standard_ExecutorFinish(queryDesc);
+	}
+	PG_CATCH();
+	{
+		exec_nested_level--;
+		handle_pg_error(span_index, queryDesc);
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+	exec_nested_level--;
+
+	end_span_index(span_index, NULL);
+}
+
+/*
+ * ExecutorEnd hook: process queryDesc and end tracing if we're at nested
+ * level 0
+ */
+static void
+pg_tracing_ExecutorEnd(QueryDesc *queryDesc)
+{
+	int			span_index = -1;
+
+	if (current_trace.sampled > 0 && pg_tracing_enabled(exec_nested_level))
+	{
+		Span	   *span;
+
+		span_index = get_index_from_trace_spans();
+		span = get_span_from_index(span_index);
+		begin_span(span, SPAN_EXECUTOR_END, &current_trace, get_parent_id(exec_nested_level), per_level_buffers[exec_nested_level].query_id,
+				   NULL, exec_nested_level);
+
+		/*
+		 * Executor finish can have a sub microsencond duration leading to
+		 * both finish and end to start at the same timestamp. Artificially
+		 * inscreate the start of the finish span if it's the case.
+		 */
+		if (span->start == executor_finish_start_ts)
+			span->start++;
+
+		/* Query finished normally, send 0 as error code */
+		process_query_desc(queryDesc, 0);
+	}
+
+	if (prev_ExecutorEnd)
+		prev_ExecutorEnd(queryDesc);
+	else
+		standard_ExecutorEnd(queryDesc);
+
+	if (span_index > -1)
+	{
+		int64		end_span_ns = get_current_ns();
+		int64		end_child_span_ns = end_span_ns - 1000;
+
+		if (exec_nested_level == 0)
+			end_top_span(exec_nested_level, &end_span_ns);
+		/* End the matching top span */
+		end_span_index(span_index, &end_child_span_ns);
+		end_tracing(&end_span_ns);
+	}
+}
+
+/*
+ * With nested queries, the post parse hook will be called before we know
+ * the top span and parse span parent. We will only know and create their
+ * parent when we generate span node from planstate.
+ *
+ * When those nodes are created, we attach the parse spans and top spans to them and
+ * adjust their start/duration.
+ */
+static void
+reparent_child_query(Span const *parent_span, int64 start_span_ns)
+{
+	Span	   *child_parse_span;
+	Span	   *child_top_span;
+
+	if (exec_nested_level + 1 > max_nested_level)
+		/* No child to reparent */
+		return;
+
+	/* Use the same start/duration as the parent */
+	child_top_span = get_top_span_for_nested_level(exec_nested_level + 1);
+	Assert(child_top_span->ended == false);
+	child_top_span->parent_id = parent_span->span_id;
+	child_top_span->start = parent_span->start;
+	child_top_span->duration_ns = parent_span->duration_ns;
+	child_top_span->ended = true;
+
+	if (per_level_buffers[exec_nested_level + 1].parse_span_index == -1)
+		/* No parse span started */
+		return;
+
+	child_parse_span = get_parse_span_from_nested_level(exec_nested_level + 1, false);
+	if (child_parse_span->trace_id > 0)
+	{
+		int64		end_parse_span_ns;
+
+		/*
+		 * We've stored the end ns of the parse span in the duration_ns field.
+		 * We can match the parse start with it's parent and compute the
+		 * duration with the elements we have.
+		 */
+		end_parse_span_ns = child_parse_span->duration_ns;
+		child_parse_span->start = parent_span->start;
+		child_parse_span->duration_ns = start_span_ns;
+		end_span(child_parse_span, &end_parse_span_ns);
+		Assert(child_parse_span->duration_ns > 0);
+	}
+}
+
+/*
+ * ProcessUtility hook
+ *
+ * Trace utility query if utility tracking is enabled and sampling was enabled
+ * during parse step.
+ *
+ * Explain statements are skipped
+ */
+static void
+pg_tracing_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
+						  bool readOnlyTree,
+						  ProcessUtilityContext context,
+						  ParamListInfo params, QueryEnvironment *queryEnv,
+						  DestReceiver *dest, QueryCompletion *qc)
+{
+	Span	   *process_utility_span;
+	int			process_utility_span_index = -1;
+	int			old_top_span_index = -1;
+	int64		end_span_ns;
+	int64		end_child_span_ns;
+	int64		start_span_ns;
+
+	if (!pg_tracing_track_utility || current_trace.sampled == 0 || !pg_tracing_enabled(exec_nested_level))
+	{
+		if (prev_ProcessUtility)
+			prev_ProcessUtility(pstmt, queryString, readOnlyTree,
+								context, params, queryEnv,
+								dest, qc);
+		else
+			standard_ProcessUtility(pstmt, queryString, readOnlyTree,
+									context, params, queryEnv,
+									dest, qc);
+
+		/*
+		 * Tracing may have been started within the utility call without going
+		 * through ExecutorEnd (ex: prepare statement). Check and end tracing
+		 * here in this case.
+		 */
+		if (!pg_tracing_mem_ctx->isReset)
+		{
+			Assert(current_trace.sampled);
+			Assert(exec_nested_level == 0);
+			end_tracing(NULL);
+		}
+		return;
+	}
+
+	start_span_ns = get_current_ns();
+
+	/*
+	 * Set a possible start for nested query. ProcessUtility may start a
+	 * nested query and we can't use the statement start like we usually do.
+	 * We artificially start nested a bit later to avoid overlapping spans due
+	 * to rounding
+	 */
+	nested_query_start_ns = start_span_ns + 1000;
+
+	/* Build the process utility span. */
+	process_utility_span_index = get_index_from_trace_spans();
+	process_utility_span = get_span_from_index(process_utility_span_index);
+	begin_span(process_utility_span, SPAN_PROCESS_UTILITY, &current_trace,
+			   get_parent_id(exec_nested_level), per_level_buffers[exec_nested_level].query_id, &start_span_ns, exec_nested_level);
+	/* Extract and normalise the query */
+	if (queryString != NULL && pstmt->stmt_location != -1 && pstmt->stmt_len > 0)
+	{
+		int			query_len = pstmt->stmt_len;
+		const char *normalised_query = normalise_query(queryString, pstmt->stmt_location, &query_len);
+
+		process_utility_span->operation_name_offset = add_str_to_trace_buffer(normalised_query, query_len);
+	}
+
+	/*
+	 * Set ProcessUtility span as the new top span just in case we have nested
+	 * query. We need to keep the old top span index to be able to close it.
+	 */
+	old_top_span_index = per_level_buffers[exec_nested_level].top_span_index;
+	set_top_span(exec_nested_level, process_utility_span_index);
+
+	exec_nested_level++;
+	PG_TRY();
+	{
+		if (prev_ProcessUtility)
+			prev_ProcessUtility(pstmt, queryString, readOnlyTree,
+								context, params, queryEnv,
+								dest, qc);
+		else
+			standard_ProcessUtility(pstmt, queryString, readOnlyTree,
+									context, params, queryEnv,
+									dest, qc);
+	}
+	PG_CATCH();
+	{
+		exec_nested_level--;
+		handle_pg_error(process_utility_span_index, NULL);
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+	end_span_ns = get_current_ns();
+
+	/* ProcessUtility may have created a top span, close it here. */
+	if (exec_nested_level <= max_nested_level && per_level_buffers[exec_nested_level].top_span_index != -1)
+	{
+		Span	   *top_span = get_top_span_for_nested_level(exec_nested_level);
+
+		if (top_span->ended == false)
+			end_top_span(exec_nested_level, &end_span_ns);
+	}
+	exec_nested_level--;
+
+	/* buffer may have been repalloced */
+	process_utility_span = get_span_from_index(process_utility_span_index);
+	if (qc != NULL)
+		process_utility_span->node_counters.rows = qc->nprocessed;
+	/* ProcessUtility is the current top span, no need to call end_top_span */
+	end_child_span_ns = end_span_ns - 1000;
+	end_span(process_utility_span, &end_child_span_ns);
+
+	/* Also end the old top span */
+	end_span_index(old_top_span_index, &end_span_ns);
+	end_tracing(&end_span_ns);
+
+	/*
+	 * We may iterate through a list of nested ProcessUtility calls, update
+	 * the nested query start.
+	 */
+	nested_query_start_ns = end_span_ns + 1000;
+}
+
+/*
+ * Iterate over a list of planstate to generate span node
+ */
+static void
+generate_member_nodes(PlanState **planstates, int nplans, planstateTraceContext * planstateTraceContext, uint64 parent_id)
+{
+	int			j;
+
+	for (j = 0; j < nplans; j++)
+		generate_span_from_planstate(planstates[j], planstateTraceContext, parent_id);
+}
+
+/*
+ * Iterate over custom scan planstates to generate span node
+ */
+static void
+generate_span_from_custom_scan(CustomScanState *css, planstateTraceContext * planstateTraceContext, uint64 parent_id)
+{
+	ListCell   *cell;
+
+	foreach(cell, css->custom_ps)
+		generate_span_from_planstate((PlanState *) lfirst(cell), planstateTraceContext, parent_id);
+}
+
+/*
+ * Create span node for the provided planstate
+ */
+static uint64
+create_span_node(PlanState *planstate, planstateTraceContext * planstateTraceContext, uint64 parent_id,
+				 SpanType span_type, char *subplan_name)
+{
+	Span	   *span;
+	int			span_index = -1;
+	char const *span_name;
+	char const *operation_name;
+	Plan const *plan = planstate->plan;
+	int64		node_duration_ns;
+	int64		start_span_ns;
+
+	/*
+	 * Make sure stats accumulation is done. Function is a no-op if if was
+	 * already done.
+	 */
+	InstrEndLoop(planstate->instrument);
+
+	/* We only create span node on node that were executed */
+	Assert(!INSTR_TIME_IS_ZERO(planstate->instrument->firsttime));
+
+	start_span_ns = INSTR_TIME_GET_NANOSEC(planstate->instrument->firsttime);
+	span_index = get_index_from_trace_spans();
+
+	span = get_span_from_index(span_index);
+	begin_span(span, span_type, planstateTraceContext->trace, parent_id, per_level_buffers[exec_nested_level].query_id,
+			   &start_span_ns, exec_nested_level);
+
+	/* first tuple time */
+	span->startup = planstate->instrument->startup * NS_PER_S;
+
+	if (span_type == SPAN_NODE)
+	{
+		/* Generate names and store them */
+		span_name = plan_to_span_name(plan);
+		operation_name = plan_to_operation(planstateTraceContext, planstate, span_name);
+
+		span->name_offset = add_str_to_trace_buffer(span_name, strlen(span_name));
+		span->operation_name_offset = add_str_to_trace_buffer(operation_name, strlen(operation_name));
+	}
+	else if (subplan_name != NULL)
+	{
+		span->operation_name_offset = add_str_to_trace_buffer(subplan_name, strlen(subplan_name));
+	}
+
+	span->node_counters.rows = (int64) planstate->instrument->ntuples / planstate->instrument->nloops;
+	span->node_counters.nloops = (int64) planstate->instrument->nloops;
+	span->node_counters.buffer_usage = planstate->instrument->bufusage;
+	span->node_counters.wal_usage = planstate->instrument->walusage;
+
+	span->plan_counters.startup_cost = plan->startup_cost;
+	span->plan_counters.total_cost = plan->total_cost;
+	span->plan_counters.plan_rows = plan->plan_rows;
+	span->plan_counters.plan_width = plan->plan_width;
+
+	if (!planstate->state->es_finished)
+	{
+		/*
+		 * We're processing this node in an error handler, stop the node
+		 * instrumentation to get it's current state
+		 */
+		InstrStopNode(planstate->instrument, planstate->state->es_processed);
+		InstrEndLoop(planstate->instrument);
+		span->sql_error_code = planstateTraceContext->sql_error_code;
+	}
+	node_duration_ns = INSTR_TIME_GET_NANOSEC(planstate->instrument->firsttime);
+	Assert(planstate->instrument->total > 0);
+	node_duration_ns += planstate->instrument->total * NS_PER_S;
+	end_span(span, &node_duration_ns);
+
+	/*
+	 * If we have a Result node, make it the span parent of the next query
+	 * span if we have any
+	 *
+	 * TODO: Do we have other node that could be the base for nested queries?
+	 */
+	if (nodeTag(plan) == T_Result)
+		reparent_child_query(span, start_span_ns);
+	return span->span_id;
+}
+
+/*
+ * Walk through the planstate tree generating a node span for each node.
+ * We pass possible error code to tag unfinished node with it
+ */
+static void
+generate_span_from_planstate(PlanState *planstate, planstateTraceContext * planstateTraceContext, uint64 parent_id)
+{
+	ListCell   *l;
+	Plan const *plan = planstate->plan;
+	uint64		span_id;
+
+	/* The node was never executed, skip it */
+	if (INSTR_TIME_IS_ZERO(planstate->instrument->firsttime))
+		return;
+
+	span_id = create_span_node(planstate, planstateTraceContext, parent_id, SPAN_NODE, NULL);
+	planstateTraceContext->ancestors = lcons(planstate->plan, planstateTraceContext->ancestors);
+
+	/* Walk the planstate tree */
+	if (planstate->lefttree)
+		generate_span_from_planstate(planstate->lefttree, planstateTraceContext, span_id);
+	if (planstate->righttree)
+		generate_span_from_planstate(planstate->righttree, planstateTraceContext, span_id);
+
+	/* Handle init plans and subplans */
+	foreach(l, planstate->initPlan)
+	{
+		SubPlanState *sstate = (SubPlanState *) lfirst(l);
+		PlanState  *splan = sstate->planstate;
+		uint64_t	init_span_id;
+
+		if (INSTR_TIME_IS_ZERO(splan->instrument->firsttime))
+			continue;
+		init_span_id = create_span_node(splan, planstateTraceContext, parent_id, SPAN_NODE_INIT_PLAN, sstate->subplan->plan_name);
+		generate_span_from_planstate(splan, planstateTraceContext, init_span_id);
+	}
+
+	foreach(l, planstate->subPlan)
+	{
+		SubPlanState *sstate = (SubPlanState *) lfirst(l);
+		PlanState  *splan = sstate->planstate;
+		uint64_t	subplan_span_id;
+
+		if (INSTR_TIME_IS_ZERO(splan->instrument->firsttime))
+			continue;
+		subplan_span_id = create_span_node(splan, planstateTraceContext, parent_id, SPAN_NODE_SUBPLAN, sstate->subplan->plan_name);
+		generate_span_from_planstate(splan, planstateTraceContext, subplan_span_id);
+	}
+
+	/* Handle special nodes with children nodes */
+	switch (nodeTag(plan))
+	{
+		case T_Append:
+			generate_member_nodes(((AppendState *) planstate)->appendplans,
+								  ((AppendState *) planstate)->as_nplans, planstateTraceContext, span_id);
+			break;
+		case T_MergeAppend:
+			generate_member_nodes(((MergeAppendState *) planstate)->mergeplans,
+								  ((MergeAppendState *) planstate)->ms_nplans, planstateTraceContext, span_id);
+			break;
+		case T_BitmapAnd:
+			generate_member_nodes(((BitmapAndState *) planstate)->bitmapplans,
+								  ((BitmapAndState *) planstate)->nplans, planstateTraceContext, span_id);
+			break;
+		case T_BitmapOr:
+			generate_member_nodes(((BitmapOrState *) planstate)->bitmapplans,
+								  ((BitmapOrState *) planstate)->nplans, planstateTraceContext, span_id);
+			break;
+		case T_SubqueryScan:
+			generate_span_from_planstate(((SubqueryScanState *) planstate)->subplan, planstateTraceContext, span_id);
+			break;
+		case T_CustomScan:
+			generate_span_from_custom_scan((CustomScanState *) planstate, planstateTraceContext, span_id);
+			break;
+		default:
+			break;
+	}
+	planstateTraceContext->ancestors = list_delete_first(planstateTraceContext->ancestors);
+}
+
+/*
+ * Add plan counters to Datum output
+ */
+static int
+add_plan_counters(const PlanCounters * plan_counters, int i, Datum *values)
+{
+	values[i++] = Float8GetDatumFast(plan_counters->startup_cost);
+	values[i++] = Float8GetDatumFast(plan_counters->total_cost);
+	values[i++] = Float8GetDatumFast(plan_counters->plan_rows);
+	values[i++] = Int32GetDatum(plan_counters->plan_width);
+	return i;
+}
+
+/*
+ * Add node counters to Datum output
+ */
+static int
+add_node_counters(const NodeCounters * node_counters, int i, Datum *values)
+{
+	Datum		wal_bytes;
+	char		buf[256];
+
+	values[i++] = Int64GetDatumFast(node_counters->rows);
+	values[i++] = Int64GetDatumFast(node_counters->nloops);
+
+	/* Buffer usage */
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.shared_blks_hit);
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.shared_blks_read);
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.shared_blks_dirtied);
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.shared_blks_written);
+
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.local_blks_hit);
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.local_blks_read);
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.local_blks_dirtied);
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.local_blks_written);
+
+	values[i++] = Float8GetDatumFast(INSTR_TIME_GET_MILLISEC(node_counters->buffer_usage.blk_read_time));
+	values[i++] = Float8GetDatumFast(INSTR_TIME_GET_MILLISEC(node_counters->buffer_usage.blk_write_time));
+
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.temp_blks_read);
+	values[i++] = Int64GetDatumFast(node_counters->buffer_usage.temp_blks_written);
+	values[i++] = Float8GetDatumFast(INSTR_TIME_GET_MILLISEC(node_counters->buffer_usage.temp_blk_read_time));
+	values[i++] = Float8GetDatumFast(INSTR_TIME_GET_MILLISEC(node_counters->buffer_usage.temp_blk_write_time));
+
+	/* WAL usage */
+	values[i++] = Int64GetDatumFast(node_counters->wal_usage.wal_records);
+	values[i++] = Int64GetDatumFast(node_counters->wal_usage.wal_fpi);
+	snprintf(buf, sizeof buf, UINT64_FORMAT, node_counters->wal_usage.wal_bytes);
+
+	/* Convert to numeric. */
+	wal_bytes = DirectFunctionCall3(numeric_in,
+									CStringGetDatum(buf),
+									ObjectIdGetDatum(0),
+									Int32GetDatum(-1));
+	values[i++] = wal_bytes;
+
+	/* JIT usage */
+	values[i++] = Int8GetDatum(node_counters->jit_usage.created_functions);
+	values[i++] = Float8GetDatumFast(INSTR_TIME_GET_MILLISEC(node_counters->jit_usage.generation_counter));
+	values[i++] = Float8GetDatumFast(INSTR_TIME_GET_MILLISEC(node_counters->jit_usage.inlining_counter));
+	values[i++] = Float8GetDatumFast(INSTR_TIME_GET_MILLISEC(node_counters->jit_usage.optimization_counter));
+	values[i++] = Float8GetDatumFast(INSTR_TIME_GET_MILLISEC(node_counters->jit_usage.emission_counter));
+
+	return i;
+}
+
+/*
+ * Build the tuple for a Span and add it to the output
+ */
+static void
+add_result_span(ReturnSetInfo *rsinfo, Span * span,
+				const char *qbuffer, Size qbuffer_size, Relation rel)
+{
+	Datum		values[PG_TRACING_TRACES_COLS] = {0};
+	bool		nulls[PG_TRACING_TRACES_COLS] = {0};
+
+	int			i = 0;
+
+	values[i++] = UInt64GetDatum(span->trace_id);
+	values[i++] = UInt64GetDatum(span->parent_id);
+	values[i++] = UInt64GetDatum(span->span_id);
+	values[i++] = UInt64GetDatum(span->query_id);
+	values[i++] = CStringGetTextDatum(get_span_name(span, qbuffer, qbuffer_size));
+	values[i++] = CStringGetTextDatum(get_operation_name(span, qbuffer, qbuffer_size));
+	values[i++] = Int64GetDatum(span->start);
+
+	values[i++] = Int64GetDatum(span->duration_ns);
+	values[i++] = CStringGetTextDatum(unpack_sql_state(span->sql_error_code));
+	values[i++] = UInt32GetDatum(span->be_pid);
+	values[i++] = UInt8GetDatum(span->nested_level);
+	values[i++] = UInt8GetDatum(span->subxact_count);
+	values[i++] = BoolGetDatum(span->top_span);
+
+	if ((span->type >= SPAN_NODE && span->type <= SPAN_NODE_UNKNOWN)
+		|| span->type == SPAN_PLANNER)
+	{
+		i = add_plan_counters(&span->plan_counters, i, values);
+		i = add_node_counters(&span->node_counters, i, values);
+
+		values[i++] = Int64GetDatum(span->startup);
+		if (span->parameter_offset != -1)
+			values[i++] = CStringGetTextDatum(qbuffer + span->parameter_offset);
+	}
+
+	for (int j = i; j < PG_TRACING_TRACES_COLS; j++)
+		nulls[j] = 1;
+
+	tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+}
+
+/*
+ * Return spans as a result set.
+ *
+ * Accept a consume parameter. When consume is set,
+ * we empty the shared buffer and query text.
+ */
+Datum
+pg_tracing_spans(PG_FUNCTION_ARGS)
+{
+	bool		consume;
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	Span	   *span;
+	const char *qbuffer;
+	Size		qbuffer_size;
+
+	consume = PG_GETARG_BOOL(0);
+	if (!pg_tracing)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("pg_tracing must be loaded via shared_preload_libraries")));
+	InitMaterializedSRF(fcinfo, 0);
+
+	/*
+	 * If this query was sampled and we're consuming tracing_spans buffer, the
+	 * spans will target a query string that doesn't exist anymore in the
+	 * query file.
+	 *
+	 * Better abort the sampling, clean ongoing traces and remove any possible
+	 * parallel context pushed
+	 *
+	 * Since this will be called within an ExecutorRun, we will need to check
+	 * for current_trace_spans later.
+	 */
+	cleanup_tracing();
+
+	qbuffer = qtext_load_file(&qbuffer_size);
+	if (!qbuffer)
+	{
+		qbuffer = palloc(0);
+		qbuffer_size = 0;
+	}
+
+	{
+		volatile	pgTracingSharedState *s = (volatile pgTracingSharedState *) pg_tracing;
+
+		SpinLockAcquire(&s->mutex);
+		for (int i = 0; i < shared_spans->end; i++)
+		{
+			span = shared_spans->spans + i;
+			add_result_span(rsinfo, span, qbuffer, qbuffer_size, NULL);
+		}
+
+		/* Consume is set, remove spans from the shared buffer */
+		if (consume)
+		{
+			shared_spans->end = 0;
+			/* We only truncate query file if there's no active writers */
+			if (s->n_writers == 0)
+			{
+				s->extent = 0;
+				pg_truncate(PG_TRACING_TEXT_FILE, 0);
+			}
+			else
+				s->stats.failed_truncates++;
+		}
+		s->stats.last_consume = GetCurrentTimestamp();
+		SpinLockRelease(&s->mutex);
+	}
+	return (Datum) 0;
+}
+
+/*
+ * Return statistics of pg_tracing.
+ */
+Datum
+pg_tracing_info(PG_FUNCTION_ARGS)
+{
+	pgTracingGlobalStats stats;
+	TupleDesc	tupdesc;
+	Datum		values[PG_TRACING_INFO_COLS] = {0};
+	bool		nulls[PG_TRACING_INFO_COLS] = {0};
+	int			i = 0;
+
+	if (!pg_tracing)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("pg_tracing must be loaded via shared_preload_libraries")));
+
+	/* Build a tuple descriptor for our result type */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	/* Get a copy of the pg_tracing stats */
+	{
+		volatile	pgTracingSharedState *s = (volatile pgTracingSharedState *) pg_tracing;
+
+		SpinLockAcquire(&s->mutex);
+		stats = s->stats;
+		SpinLockRelease(&s->mutex);
+	}
+
+	values[i++] = Int64GetDatum(stats.traces);
+	values[i++] = Int64GetDatum(stats.spans);
+	values[i++] = Int64GetDatum(stats.dropped_spans);
+	values[i++] = TimestampTzGetDatum(stats.last_consume);
+	values[i++] = TimestampTzGetDatum(stats.stats_reset);
+	values[i++] = Float8GetDatum(pg_tracing_sample_rate);
+
+	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * Reset pg_tracing statistics.
+ */
+Datum
+pg_tracing_reset(PG_FUNCTION_ARGS)
+{
+	/*
+	 * Reset global statistics for pg_tracing since all entries are removed.
+	 */
+	{
+		volatile	pgTracingSharedState *s = (volatile pgTracingSharedState *) pg_tracing;
+
+		SpinLockAcquire(&s->mutex);
+		s->stats.traces = 0;
+		s->stats.spans = 0;
+		s->stats.dropped_spans = 0;
+		s->stats.failed_truncates = 0;
+		s->stats.last_consume = 0;
+		s->stats.stats_reset = GetCurrentTimestamp();
+		SpinLockRelease(&s->mutex);
+	}
+	PG_RETURN_VOID();
+}
diff --git a/contrib/pg_tracing/pg_tracing.conf b/contrib/pg_tracing/pg_tracing.conf
new file mode 100644
index 0000000000..ad6b5e7c1d
--- /dev/null
+++ b/contrib/pg_tracing/pg_tracing.conf
@@ -0,0 +1 @@
+shared_preload_libraries = 'pg_tracing'
diff --git a/contrib/pg_tracing/pg_tracing.control b/contrib/pg_tracing/pg_tracing.control
new file mode 100644
index 0000000000..efa78a6610
--- /dev/null
+++ b/contrib/pg_tracing/pg_tracing.control
@@ -0,0 +1,5 @@
+# pg_tracing extension
+comment = 'Distributed tracing for postgres'
+default_version = '1.0'
+module_pathname = '$libdir/pg_tracing'
+relocatable = true
diff --git a/contrib/pg_tracing/pg_tracing.h b/contrib/pg_tracing/pg_tracing.h
new file mode 100644
index 0000000000..4a7b041df4
--- /dev/null
+++ b/contrib/pg_tracing/pg_tracing.h
@@ -0,0 +1,43 @@
+/*-------------------------------------------------------------------------
+ * pg_tracing.h
+ *
+ * IDENTIFICATION
+ *	  contrib/pg_tracing/pg_tracing.h
+ *-------------------------------------------------------------------------
+ */
+#ifndef _PG_TRACING_H_
+#define _PG_TRACING_H_
+
+#include "postgres.h"
+#include "span.h"
+
+#include "storage/s_lock.h"
+
+/*
+ * Global statistics for pg_tracing
+ */
+typedef struct pgTracingGlobalStats
+{
+	int64		traces;			/* number of traces processed */
+	int64		spans;			/* number of spans processed */
+	int64		dropped_spans;	/* number of dropped spans due to full buffer */
+	int64		failed_truncates;	/* number of failed query file truncates
+									 * due to active writer */
+	TimestampTz last_consume;	/* Last time the shared spans buffer was
+								 * consumed */
+	TimestampTz stats_reset;	/* Last time stats were reset */
+}			pgTracingGlobalStats;
+
+/*
+ * Global shared state
+ */
+typedef struct pgTracingSharedState
+{
+	slock_t		mutex;			/* protects shared stats fields and
+								 * shared_spans buffer */
+	Size		extent;			/* current extent of query file */
+	int			n_writers;		/* number of active writers to query file */
+	pgTracingGlobalStats stats; /* global statistics for pg_tracing */
+}			pgTracingSharedState;
+
+#endif
diff --git a/contrib/pg_tracing/pg_tracing_explain.c b/contrib/pg_tracing/pg_tracing_explain.c
new file mode 100644
index 0000000000..7f7155d2d3
--- /dev/null
+++ b/contrib/pg_tracing/pg_tracing_explain.c
@@ -0,0 +1,527 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_tracing_explain.c
+ *
+ *
+ * Copyright (c) 2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  contrib/pg_tracing/pg_tracing_explain.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pg_tracing_explain.h"
+
+#include "nodes/makefuncs.h"
+#include "utils/builtins.h"
+#include "parser/parsetree.h"
+#include "nodes/extensible.h"
+#include "utils/lsyscache.h"
+#include "utils/ruleutils.h"
+
+
+static void
+ExplainTargetRel(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, Index rti, StringInfo str)
+{
+	char	   *objectname = NULL;
+	char	   *namespace = NULL;
+	RangeTblEntry *rte;
+	char	   *refname;
+
+	List	   *rtable;
+
+	rtable = planstate->state->es_range_table;
+	rte = rt_fetch(rti, rtable);
+
+	refname = (char *) list_nth(planstateTraceContext->rtable_names, rti - 1);
+	if (refname == NULL)
+		refname = rte->eref->aliasname;
+
+	switch (nodeTag(planstate->plan))
+	{
+		case T_SeqScan:
+		case T_SampleScan:
+		case T_IndexScan:
+		case T_IndexOnlyScan:
+		case T_BitmapHeapScan:
+		case T_TidScan:
+		case T_TidRangeScan:
+		case T_ForeignScan:
+		case T_CustomScan:
+		case T_ModifyTable:
+			/* Assert it's on a real relation */
+			Assert(rte->rtekind == RTE_RELATION);
+			objectname = get_rel_name(rte->relid);
+			break;
+		case T_FunctionScan:
+			{
+				FunctionScan *fscan = (FunctionScan *) planstate->plan;
+
+				/* Assert it's on a RangeFunction */
+				Assert(rte->rtekind == RTE_FUNCTION);
+
+				/*
+				 * If the expression is still a function call of a single
+				 * function, we can get the real name of the function.
+				 * Otherwise, punt.  (Even if it was a single function call
+				 * originally, the optimizer could have simplified it away.)
+				 */
+				if (list_length(fscan->functions) == 1)
+				{
+					RangeTblFunction *rtfunc = (RangeTblFunction *) linitial(fscan->functions);
+
+					if (IsA(rtfunc->funcexpr, FuncExpr))
+					{
+						FuncExpr   *funcexpr = (FuncExpr *) rtfunc->funcexpr;
+						Oid			funcid = funcexpr->funcid;
+
+						objectname = get_func_name(funcid);
+					}
+				}
+			}
+			break;
+		case T_TableFuncScan:
+			Assert(rte->rtekind == RTE_TABLEFUNC);
+			objectname = "xmltable";
+			break;
+		case T_ValuesScan:
+			Assert(rte->rtekind == RTE_VALUES);
+			break;
+		case T_CteScan:
+			/* Assert it's on a non-self-reference CTE */
+			Assert(rte->rtekind == RTE_CTE);
+			Assert(!rte->self_reference);
+			objectname = rte->ctename;
+			break;
+		case T_NamedTuplestoreScan:
+			Assert(rte->rtekind == RTE_NAMEDTUPLESTORE);
+			objectname = rte->enrname;
+			break;
+		case T_WorkTableScan:
+			/* Assert it's on a self-reference CTE */
+			Assert(rte->rtekind == RTE_CTE);
+			Assert(rte->self_reference);
+			objectname = rte->ctename;
+			break;
+		default:
+			break;
+	}
+
+	appendStringInfoString(str, " on");
+	if (namespace != NULL)
+		appendStringInfo(str, " %s.%s", quote_identifier(namespace),
+						 quote_identifier(objectname));
+	else if (objectname != NULL)
+		appendStringInfo(str, " %s", quote_identifier(objectname));
+	if (objectname == NULL || strcmp(refname, objectname) != 0)
+		appendStringInfo(str, " %s", quote_identifier(refname));
+}
+
+
+static void
+ExplainScanTarget(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, const Scan *plan, StringInfo str)
+{
+	ExplainTargetRel(planstateTraceContext, planstate, plan->scanrelid, str);
+}
+
+static void
+ExplainModifyTarget(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, const ModifyTable *plan, StringInfo str)
+{
+	ExplainTargetRel(planstateTraceContext, planstate, plan->nominalRelation, str);
+}
+
+static void
+ExplainIndexScanDetails(Oid indexId, ScanDirection indexorderdir,
+						StringInfo str)
+{
+	const char *indexname = get_rel_name(indexId);
+
+	if (ScanDirectionIsBackward(indexorderdir))
+		appendStringInfoString(str, " Backward");
+	appendStringInfo(str, " using %s", quote_identifier(indexname));
+}
+
+static void
+plan_to_rel_name(const PlanState *planstate, const planstateTraceContext * planstateTraceContext, StringInfo str)
+{
+	const Plan *plan = planstate->plan;
+
+	switch (nodeTag(plan))
+	{
+		case T_SeqScan:
+		case T_SampleScan:
+		case T_BitmapHeapScan:
+		case T_TidScan:
+		case T_TidRangeScan:
+		case T_SubqueryScan:
+		case T_FunctionScan:
+		case T_TableFuncScan:
+		case T_ValuesScan:
+		case T_CteScan:
+		case T_WorkTableScan:
+			ExplainScanTarget(planstateTraceContext, planstate, (Scan *) plan, str);
+			break;
+		case T_ForeignScan:
+		case T_CustomScan:
+			if (((Scan *) plan)->scanrelid > 0)
+				ExplainScanTarget(planstateTraceContext, planstate, (Scan *) plan, str);
+			break;
+		case T_IndexScan:
+			{
+				IndexScan  *indexscan = (IndexScan *) plan;
+
+				ExplainIndexScanDetails(indexscan->indexid,
+										indexscan->indexorderdir,
+										str);
+				ExplainScanTarget(planstateTraceContext, planstate, (Scan *) indexscan, str);
+			}
+			break;
+		case T_IndexOnlyScan:
+			{
+				IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan;
+
+				ExplainIndexScanDetails(indexonlyscan->indexid,
+										indexonlyscan->indexorderdir,
+										str);
+				ExplainScanTarget(planstateTraceContext, planstate, (Scan *) indexonlyscan, str);
+			}
+			break;
+		case T_BitmapIndexScan:
+			{
+				BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan;
+				const char *indexname = get_rel_name(bitmapindexscan->indexid);
+
+				appendStringInfo(str, " on %s",
+								 quote_identifier(indexname));
+			}
+			break;
+		case T_ModifyTable:
+			ExplainModifyTarget(planstateTraceContext, planstate, (ModifyTable *) plan, str);
+			break;
+		case T_NestLoop:
+		case T_MergeJoin:
+		case T_HashJoin:
+			{
+				const char *jointype;
+
+				switch (((Join *) plan)->jointype)
+				{
+					case JOIN_INNER:
+						jointype = "Inner";
+						break;
+					case JOIN_LEFT:
+						jointype = "Left";
+						break;
+					case JOIN_FULL:
+						jointype = "Full";
+						break;
+					case JOIN_RIGHT:
+						jointype = "Right";
+						break;
+					case JOIN_SEMI:
+						jointype = "Semi";
+						break;
+					case JOIN_ANTI:
+						jointype = "Anti";
+						break;
+					case JOIN_RIGHT_ANTI:
+						jointype = "Right Anti";
+						break;
+					default:
+						jointype = "???";
+						break;
+				}
+				if (((Join *) plan)->jointype != JOIN_INNER)
+					appendStringInfo(str, " %s Join", jointype);
+				else if (!IsA(plan, NestLoop))
+					appendStringInfoString(str, " Join");
+			}
+			break;
+		case T_SetOp:
+			{
+				const char *setopcmd;
+
+				switch (((SetOp *) plan)->cmd)
+				{
+					case SETOPCMD_INTERSECT:
+						setopcmd = "Intersect";
+						break;
+					case SETOPCMD_INTERSECT_ALL:
+						setopcmd = "Intersect All";
+						break;
+					case SETOPCMD_EXCEPT:
+						setopcmd = "Except";
+						break;
+					case SETOPCMD_EXCEPT_ALL:
+						setopcmd = "Except All";
+						break;
+					default:
+						setopcmd = "???";
+						break;
+				}
+				appendStringInfo(str, " %s", setopcmd);
+			}
+			break;
+		default:
+			break;
+	}
+}
+
+static void
+add_scan_qual(StringInfo str, const PlanState *planstate, List *qual,
+			  const char *qlabel, List *ancestors, List *deparse_ctx,
+			  bool useprefix)
+{
+	List	   *context;
+	char	   *exprstr;
+	Node	   *node;
+
+	if (qual == NIL)
+		return;
+
+	node = (Node *) make_ands_explicit(qual);
+
+	/* Set up deparsing context */
+	context = set_deparse_context_plan(deparse_ctx,
+									   planstate->plan,
+									   ancestors);
+
+	/* Deparse the expression */
+	appendStringInfoString(str, "|");
+	exprstr = deparse_expression(node, context, useprefix, false);
+	appendStringInfoString(str, qlabel);
+	appendStringInfoString(str, exprstr);
+}
+
+char const *
+plan_to_operation(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, const char *spanName)
+{
+	StringInfo	operation_name = makeStringInfo();
+	Plan const *plan = planstate->plan;
+	List	   *ancestors = planstateTraceContext->ancestors;
+	List	   *deparse_ctx = planstateTraceContext->deparse_ctx;
+
+	if (plan->parallel_aware)
+		appendStringInfoString(operation_name, "Parallel ");
+	if (plan->async_capable)
+		appendStringInfoString(operation_name, "Async ");
+	appendStringInfoString(operation_name, spanName);
+
+	plan_to_rel_name(planstate, planstateTraceContext, operation_name);
+
+	switch (nodeTag(plan))
+	{
+		case T_IndexScan:
+			add_scan_qual(operation_name, planstate, ((IndexScan *) plan)->indexqualorig, "Index Cond: ",
+						  ancestors, deparse_ctx, false);
+			break;
+		case T_IndexOnlyScan:
+			add_scan_qual(operation_name, planstate, ((IndexOnlyScan *) plan)->indexqual, "Index Cond: ",
+						  ancestors, deparse_ctx, false);
+			break;
+		case T_BitmapIndexScan:
+			add_scan_qual(operation_name, planstate, ((BitmapIndexScan *) plan)->indexqualorig, "Index Cond: ",
+						  ancestors, deparse_ctx, false);
+			break;
+		case T_BitmapHeapScan:
+			add_scan_qual(operation_name, planstate, ((BitmapHeapScan *) plan)->bitmapqualorig, "Recheck Cond: ",
+						  ancestors, deparse_ctx, false);
+			break;
+		case T_SeqScan:
+		case T_ValuesScan:
+		case T_CteScan:
+		case T_NamedTuplestoreScan:
+		case T_WorkTableScan:
+		case T_SubqueryScan:
+		case T_Gather:
+		case T_GatherMerge:
+		case T_FunctionScan:
+		case T_TableFuncScan:
+		case T_TidRangeScan:
+		case T_ForeignScan:
+		case T_CustomScan:
+		case T_Agg:
+		case T_WindowAgg:
+		case T_Group:
+		case T_Result:
+			add_scan_qual(operation_name, planstate, plan->qual, "Filter : ",
+						  ancestors, deparse_ctx, false);
+			break;
+		case T_NestLoop:
+			add_scan_qual(operation_name, planstate, ((NestLoop *) plan)->join.joinqual, "Join Filter : ",
+						  ancestors, deparse_ctx, false);
+			add_scan_qual(operation_name, planstate, plan->qual, "Filter : ",
+						  ancestors, deparse_ctx, false);
+			break;
+		case T_MergeJoin:
+			add_scan_qual(operation_name, planstate, ((MergeJoin *) plan)->mergeclauses, "Merge Cond: ",
+						  ancestors, deparse_ctx, false);
+			add_scan_qual(operation_name, planstate, ((MergeJoin *) plan)->join.joinqual, "Join Filter: ",
+						  ancestors, deparse_ctx, false);
+			add_scan_qual(operation_name, planstate, plan->qual, "Filter : ",
+						  ancestors, deparse_ctx, false);
+			break;
+		case T_HashJoin:
+			add_scan_qual(operation_name, planstate, ((HashJoin *) plan)->hashclauses, "Hash Cond: ",
+						  ancestors, deparse_ctx, false);
+			add_scan_qual(operation_name, planstate, ((MergeJoin *) plan)->join.joinqual, "Join Filter: ",
+						  ancestors, deparse_ctx, false);
+			add_scan_qual(operation_name, planstate, plan->qual, "Filter : ",
+						  ancestors, deparse_ctx, false);
+			break;
+		default:
+			break;
+	}
+
+	return operation_name->data;
+}
+
+const char *
+plan_to_span_name(const Plan *plan)
+{
+	const char *custom_name;
+
+	switch (nodeTag(plan))
+	{
+		case T_Result:
+			return "Result";
+		case T_ProjectSet:
+			return "ProjectSet";
+		case T_ModifyTable:
+			switch (((ModifyTable *) plan)->operation)
+			{
+				case CMD_INSERT:
+					return "Insert";
+				case CMD_UPDATE:
+					return "Update";
+				case CMD_DELETE:
+					return "Delete";
+				case CMD_MERGE:
+					return "Merge";
+				default:
+					return "???";
+			}
+		case T_Append:
+			return "Append";
+		case T_MergeAppend:
+			return "MergeAppend";
+		case T_RecursiveUnion:
+			return "RecursiveUnion";
+		case T_BitmapAnd:
+			return "BitmapAnd";
+		case T_BitmapOr:
+			return "BitmapOr";
+		case T_NestLoop:
+			return "NestedLoop";
+		case T_MergeJoin:
+			return "Merge";		/* "Join" gets added by jointype switch */
+		case T_HashJoin:
+			return "Hash";		/* "Join" gets added by jointype switch */
+		case T_SeqScan:
+			return "SeqScan";
+		case T_SampleScan:
+			return "SampleScan";
+		case T_Gather:
+			return "Gather";
+		case T_GatherMerge:
+			return "GatherMerge";
+		case T_IndexScan:
+			return "IndexScan";
+		case T_IndexOnlyScan:
+			return "IndexOnlyScan";
+		case T_BitmapIndexScan:
+			return "BitmapIndexScan";
+		case T_BitmapHeapScan:
+			return "BitmapHeapScan";
+		case T_TidScan:
+			return "TidScan";
+		case T_TidRangeScan:
+			return "TidRangeScan";
+		case T_SubqueryScan:
+			return "SubqueryScan";
+		case T_FunctionScan:
+			return "FunctionScan";
+		case T_TableFuncScan:
+			return "TableFunctionScan";
+		case T_ValuesScan:
+			return "ValuesScan";
+		case T_CteScan:
+			return "CTEScan";
+		case T_NamedTuplestoreScan:
+			return "NamedTuplestoreScan";
+		case T_WorkTableScan:
+			return "WorkTableScan";
+		case T_ForeignScan:
+			switch (((ForeignScan *) plan)->operation)
+			{
+				case CMD_SELECT:
+					return "ForeignScan";
+				case CMD_INSERT:
+					return "ForeignInsert";
+				case CMD_UPDATE:
+					return "ForeignUpdate";
+				case CMD_DELETE:
+					return "ForeignDelete";
+				default:
+					return "???";
+			}
+		case T_CustomScan:
+			custom_name = ((CustomScan *) plan)->methods->CustomName;
+			if (custom_name)
+				return psprintf("CustomScan (%s)", custom_name);
+			else
+				return "CustomScan";
+		case T_Material:
+			return "Materialize";
+		case T_Memoize:
+			return "Memoize";
+		case T_Sort:
+			return "Sort";
+		case T_IncrementalSort:
+			return "IncrementalSort";
+		case T_Group:
+			return "Group";
+		case T_Agg:
+			{
+				Agg		   *agg = (Agg *) plan;
+
+				switch (agg->aggstrategy)
+				{
+					case AGG_PLAIN:
+						return "Aggregate";
+					case AGG_SORTED:
+						return "GroupAggregate";
+					case AGG_HASHED:
+						return "HashAggregate";
+					case AGG_MIXED:
+						return "MixedAggregate";
+					default:
+						return "Aggregate ???";
+				}
+			}
+		case T_WindowAgg:
+			return "WindowAgg";
+		case T_Unique:
+			return "Unique";
+		case T_SetOp:
+			switch (((SetOp *) plan)->strategy)
+			{
+				case SETOP_SORTED:
+					return "SetOp";
+				case SETOP_HASHED:
+					return "HashSetOp";
+				default:
+					return "SetOp ???";
+			}
+		case T_LockRows:
+			return "LockRows";
+		case T_Limit:
+			return "Limit";
+		case T_Hash:
+			return "Hash";
+		default:
+			return "???";
+	}
+}
diff --git a/contrib/pg_tracing/pg_tracing_explain.h b/contrib/pg_tracing/pg_tracing_explain.h
new file mode 100644
index 0000000000..6551e15d61
--- /dev/null
+++ b/contrib/pg_tracing/pg_tracing_explain.h
@@ -0,0 +1,20 @@
+#ifndef _PG_TRACING_EXPLAIN_H_
+#define _PG_TRACING_EXPLAIN_H_
+
+#include "span.h"
+#include "executor/execdesc.h"
+
+/* Context needed when generating spans from planstate */
+typedef struct planstateTraceContext
+{
+	pgTracingTrace *trace;
+	int			sql_error_code;
+	List	   *ancestors;
+	List	   *deparse_ctx;
+	List	   *rtable_names;
+}			planstateTraceContext;
+
+extern const char *plan_to_span_name(const Plan *plan);
+extern const char *plan_to_operation(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, const char *spanName);
+
+#endif
diff --git a/contrib/pg_tracing/query_process.c b/contrib/pg_tracing/query_process.c
new file mode 100644
index 0000000000..37a8c2b22c
--- /dev/null
+++ b/contrib/pg_tracing/query_process.c
@@ -0,0 +1,475 @@
+#include "query_process.h"
+#include "pg_tracing.h"
+
+#include <sys/stat.h>
+#include <unistd.h>
+#include "parser/scanner.h"
+#include "nodes/extensible.h"
+
+/*
+ * Initialize traceparent parameter fields
+ */
+void
+initialize_traceparent(pgTracingTrace * traceparent_parameter)
+{
+	traceparent_parameter->sampled = 0;
+	traceparent_parameter->trace_id = 0;
+	traceparent_parameter->parent_id = 0;
+}
+
+/*
+ * Extract traceparent parameters from SQLCommenter
+ *
+ * We're expecting the query to start with a SQLComment containing the
+ * traceparent parameter
+ * "/\*traceparent='00-00000000000000000000000000000009-0000000000000005-01'*\/
+ * SELECT 1;"
+ * Traceparent has the following format: version-traceid-parentid-sampled
+ * We also accept SQLCommenter as a parameter. In this case, we don't have
+ * the start and end of comments.
+ *
+ * if is_parameter is true, the SQLComment is passed as a query parameter and
+ * won't have the comment start and end.
+ */
+pgTracingTrace
+extract_traceparent(const char *sqlcomment_str, bool is_parameter)
+{
+	const char *expected_start = "/*";
+	const char *traceparent;
+	const char *end_sqlcomment;
+	char	   *endptr;
+	pgTracingTrace pgTracingTrace = {};
+
+	if (!is_parameter)
+	{
+		/*
+		 * Look for the start of a comment We're expecting the comment to be
+		 * at the very start of the query
+		 */
+		for (size_t i = 0; i < strlen(expected_start); i++)
+		{
+			if (sqlcomment_str[i] != expected_start[i])
+				return pgTracingTrace;
+		}
+
+		/*
+		 * Check that we have an end
+		 */
+		end_sqlcomment = strstr(sqlcomment_str, "*/");
+		if (end_sqlcomment == NULL)
+			return pgTracingTrace;
+	}
+	else
+		end_sqlcomment = sqlcomment_str + strlen(sqlcomment_str);
+
+	/*
+	 * Locate traceparent parameter and make sure it has the expected size
+	 * "traceparent" + "=" + '' -> 13 characters
+	 * 00-00000000000000000000000000000009-0000000000000005-01 -> 55
+	 * characters
+	 */
+	traceparent = strstr(sqlcomment_str, "traceparent='");
+	if (traceparent == NULL || traceparent > end_sqlcomment || end_sqlcomment - traceparent < 55 + 13)
+		return pgTracingTrace;
+
+	/*
+	 * Move to the start of the traceparent values
+	 */
+	traceparent = traceparent + 13;
+
+	/*
+	 * Check that '-' are at the expected places
+	 */
+	if (traceparent[2] != '-' || traceparent[35] != '-' || traceparent[52] != '-')
+		return pgTracingTrace;
+
+	/*
+	 * Parse traceparent parameters
+	 */
+	errno = 0;
+	pgTracingTrace.trace_id = strtol(&traceparent[3], &endptr, 16);
+	if (endptr != traceparent + 35 || errno)
+		return pgTracingTrace;
+	pgTracingTrace.parent_id = strtol(&traceparent[36], &endptr, 16);
+	if (endptr != traceparent + 52 || errno)
+		return pgTracingTrace;
+	pgTracingTrace.sampled = strtol(&traceparent[53], &endptr, 16);
+	if (endptr != traceparent + 55 || errno)
+
+		/*
+		 * Just to be sure, reset sampled on error
+		 */
+		pgTracingTrace.sampled = 0;
+	return pgTracingTrace;
+}
+
+
+/*
+ * comp_location: comparator for qsorting LocationLen structs by location
+ */
+static int
+comp_location(const void *a, const void *b)
+{
+	int			l = ((const LocationLen *) a)->location;
+	int			r = ((const LocationLen *) b)->location;
+
+	if (l < r)
+		return -1;
+	else if (l > r)
+		return +1;
+	else
+		return 0;
+}
+
+
+/*
+ * Normalise query and fill paramStr.
+ * Normalised query will separate tokens with a single space and
+ * parameters are replaced by $1, $2...
+ * Parameters are put in the paramStr wich will contain all parameters values
+ * using the format: "$1 = 0, $2 = 'v'"
+ */
+const char *
+normalise_query_parameters(const JumbleState *jstate, const char *query,
+						   int query_loc, int *query_len_p, char **paramStr)
+{
+	char	   *norm_query;
+	int			query_len = *query_len_p;
+	int			norm_query_buflen,	/* Space allowed for norm_query */
+				n_quer_loc = 0;
+	LocationLen *locs;
+	core_yyscan_t yyscanner;
+	core_yy_extra_type yyextra;
+	core_YYSTYPE yylval;
+	YYLTYPE		yylloc;
+	int			current_loc = 0;
+	StringInfoData buf;
+
+	initStringInfo(&buf);
+
+	norm_query_buflen = query_len + jstate->clocations_count * 10;
+	Assert(norm_query_buflen > 0);
+	locs = jstate->clocations;
+
+	/* Allocate result buffer */
+	norm_query = palloc(norm_query_buflen + 1);
+
+	if (jstate->clocations_count > 1)
+		qsort(jstate->clocations, jstate->clocations_count,
+			  sizeof(LocationLen), comp_location);
+
+	/* initialize the flex scanner --- should match raw_parser() */
+	yyscanner = scanner_init(query + query_loc,
+							 &yyextra,
+							 &ScanKeywords,
+							 ScanKeywordTokens);
+
+	for (;;)
+	{
+		int			loc = locs[current_loc].location;
+		int			tok;
+
+		loc -= query_loc;
+
+		tok = core_yylex(&yylval, &yylloc, yyscanner);
+
+		/*
+		 * We should not hit end-of-string, but if we do, behave sanely
+		 */
+		if (tok == 0)
+			break;				/* out of inner for-loop */
+		if (yylloc > query_len)
+			break;
+
+		/*
+		 * We should find the token position exactly, but if we somehow run
+		 * past it, work with that.
+		 */
+		if (current_loc < jstate->clocations_count && yylloc >= loc)
+		{
+			appendStringInfo(&buf,
+							 "%s$%d = ",
+							 current_loc > 0 ? ", " : "",
+							 current_loc + 1);
+			if (query[loc] == '-')
+			{
+				/*
+				 * It's a negative value - this is the one and only case where
+				 * we replace more than a single token.
+				 *
+				 * Do not compensate for the core system's special-case
+				 * adjustment of location to that of the leading '-' operator
+				 * in the event of a negative constant.  It is also useful for
+				 * our purposes to start from the minus symbol.  In this way,
+				 * queries like "select * from foo where bar = 1" and "select *
+				 * from foo where bar = -2" will have identical normalized
+				 * query strings.
+				 */
+				appendStringInfoChar(&buf, '-');
+				tok = core_yylex(&yylval, &yylloc, yyscanner);
+				if (tok == 0)
+					break;		/* out of inner for-loop */
+			}
+			if (yylloc > 0 && yyextra.scanbuf[yylloc - 1] == ' ' && n_quer_loc > 0)
+			{
+				norm_query[n_quer_loc++] = ' ';
+			}
+
+			/*
+			 * Append the current parameter $x in the normalised query
+			 */
+			n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+								  current_loc + 1 + jstate->highest_extern_param_id);
+
+			appendStringInfoString(&buf, yyextra.scanbuf + yylloc);
+
+			current_loc++;
+		}
+		else
+		{
+			int			to_copy;
+
+			if (yylloc > 0 && yyextra.scanbuf[yylloc - 1] == ' ' && n_quer_loc > 0)
+			{
+				norm_query[n_quer_loc++] = ' ';
+			}
+			to_copy = strlen(yyextra.scanbuf + yylloc);
+			Assert(n_quer_loc + to_copy < norm_query_buflen + 1);
+			memcpy(norm_query + n_quer_loc, yyextra.scanbuf + yylloc, to_copy);
+			n_quer_loc += to_copy;
+		}
+	}
+	scanner_finish(yyscanner);
+
+	*query_len_p = n_quer_loc;
+	norm_query[n_quer_loc] = '\0';
+	*paramStr = buf.data;
+	return norm_query;
+}
+
+/*
+ * Normalise query: tokens will be separated by a single space
+ */
+const char *
+normalise_query(const char *query, int query_loc, int *query_len_p)
+{
+	char	   *norm_query;
+	int			query_len = *query_len_p;
+	int			norm_query_buflen = query_len;
+	int			n_quer_loc = 0;
+	core_yyscan_t yyscanner;
+	core_yy_extra_type yyextra;
+	core_YYSTYPE yylval;
+	YYLTYPE		yylloc;
+
+	/* Allocate result buffer */
+	norm_query = palloc(norm_query_buflen + 2);
+
+	/* initialize the flex scanner --- should match raw_parser() */
+	yyscanner = scanner_init(query + query_loc, &yyextra, &ScanKeywords, ScanKeywordTokens);
+	for (;;)
+	{
+		int			tok;
+		int			to_copy;
+
+		tok = core_yylex(&yylval, &yylloc, yyscanner);
+
+		if (tok == 0)
+			break;				/* out of inner for-loop */
+		if (yylloc > query_len)
+			break;
+
+		if (yylloc > 0 && yyextra.scanbuf[yylloc - 1] == ' ' && n_quer_loc > 0)
+		{
+			norm_query[n_quer_loc++] = ' ';
+		}
+		to_copy = strlen(yyextra.scanbuf + yylloc);
+		Assert(n_quer_loc + to_copy < norm_query_buflen + 2);
+		memcpy(norm_query + n_quer_loc, yyextra.scanbuf + yylloc, to_copy);
+		n_quer_loc += to_copy;
+	}
+	scanner_finish(yyscanner);
+
+	*query_len_p = n_quer_loc;
+	norm_query[n_quer_loc] = '\0';
+	return norm_query;
+}
+
+/*
+ * Store text in the pg_tracing stat file
+ */
+bool
+text_store_file(pgTracingSharedState * pg_tracing, const char *text, int text_len,
+				Size *query_offset)
+{
+	Size		off;
+	int			fd;
+
+	{
+		volatile	pgTracingSharedState *s = (volatile pgTracingSharedState *) pg_tracing;
+
+		SpinLockAcquire(&s->mutex);
+		off = s->extent;
+		s->extent += text_len + 1;
+		s->n_writers++;
+		SpinLockRelease(&s->mutex);
+	}
+
+	/*
+	 * Don't allow the file to grow larger than what qtext_load_file can
+	 * (theoretically) handle.  This has been seen to be reachable on 32-bit
+	 * platforms.
+	 */
+	if (unlikely(text_len >= MaxAllocHugeSize - off))
+	{
+		errno = EFBIG;			/* not quite right, but it'll do */
+		fd = -1;
+		goto error;
+	}
+
+	/* Now write the data into the successfully-reserved part of the file */
+	fd = OpenTransientFile(PG_TRACING_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
+	if (fd < 0)
+		goto error;
+
+	if (pg_pwrite(fd, text, text_len, off) != text_len)
+		goto error;
+	if (pg_pwrite(fd, "\0", 1, off + text_len) != 1)
+		goto error;
+
+	CloseTransientFile(fd);
+
+	/* Mark our write complete */
+	{
+		volatile	pgTracingSharedState *s = (volatile pgTracingSharedState *) pg_tracing;
+
+		SpinLockAcquire(&s->mutex);
+		s->n_writers--;
+		SpinLockRelease(&s->mutex);
+	}
+
+	/*
+	 * Set offset once write was succesful
+	 */
+	*query_offset = off;
+
+	return true;
+
+error:
+	ereport(LOG,
+			(errcode_for_file_access(),
+			 errmsg("could not write file \"%s\": %m",
+					PG_TRACING_TEXT_FILE)));
+
+	if (fd >= 0)
+		CloseTransientFile(fd);
+
+	/* Mark our write complete */
+	{
+		volatile	pgTracingSharedState *s = (volatile pgTracingSharedState *) pg_tracing;
+
+		SpinLockAcquire(&s->mutex);
+		s->n_writers--;
+		SpinLockRelease(&s->mutex);
+	}
+
+	return false;
+}
+
+/*
+ * Read the external query text file into a malloc'd buffer.
+ *
+ * Returns NULL (without throwing an error) if unable to read, eg file not
+ * there or insufficient memory.
+ *
+ * On success, the buffer size is also returned into *buffer_size.
+ *
+ * This can be called without any lock on pgss->lock, but in that case the
+ * caller is responsible for verifying that the result is sane.
+ */
+const char *
+qtext_load_file(Size *buffer_size)
+{
+	char	   *buf;
+	int			fd;
+	struct stat stat;
+	Size		nread;
+
+	fd = OpenTransientFile(PG_TRACING_TEXT_FILE, O_RDONLY | PG_BINARY);
+	if (fd < 0)
+	{
+		if (errno != ENOENT)
+			ereport(LOG,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m",
+							PG_TRACING_TEXT_FILE)));
+		return NULL;
+	}
+
+	/* Get file length */
+	if (fstat(fd, &stat))
+	{
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not stat file \"%s\": %m",
+						PG_TRACING_TEXT_FILE)));
+		CloseTransientFile(fd);
+		return NULL;
+	}
+
+	/* Allocate buffer; beware that off_t might be wider than size_t */
+	if (stat.st_size <= MaxAllocHugeSize)
+		buf = (char *) malloc(stat.st_size);
+	else
+		buf = NULL;
+	if (buf == NULL)
+	{
+		ereport(LOG,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory"),
+				 errdetail("Could not allocate enough memory to read file \"%s\".",
+						   PG_TRACING_TEXT_FILE)));
+		CloseTransientFile(fd);
+		return NULL;
+	}
+
+	/*
+	 * OK, slurp in the file.  Windows fails if we try to read more than
+	 * INT_MAX bytes at once, and other platforms might not like that either,
+	 * so read a very large file in 1GB segments.
+	 */
+	nread = 0;
+	while (nread < stat.st_size)
+	{
+		int			toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
+
+		/*
+		 * If we get a short read and errno doesn't get set, the reason is
+		 * probably that garbage collection truncated the file since we did
+		 * the fstat(), so we don't log a complaint --- but we don't return
+		 * the data, either, since it's most likely corrupt due to concurrent
+		 * writes from garbage collection.
+		 */
+		errno = 0;
+		if (read(fd, buf + nread, toread) != toread)
+		{
+			if (errno)
+				ereport(LOG,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m",
+								PG_TRACING_TEXT_FILE)));
+			free(buf);
+			CloseTransientFile(fd);
+			return NULL;
+		}
+		nread += toread;
+	}
+
+	if (CloseTransientFile(fd) != 0)
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not close file \"%s\": %m", PG_TRACING_TEXT_FILE)));
+
+	*buffer_size = nread;
+	return buf;
+}
diff --git a/contrib/pg_tracing/query_process.h b/contrib/pg_tracing/query_process.h
new file mode 100644
index 0000000000..ce37b2e0d4
--- /dev/null
+++ b/contrib/pg_tracing/query_process.h
@@ -0,0 +1,30 @@
+#ifndef _QUERY_PROCESS_H_
+#define _QUERY_PROCESS_H_
+
+#include "pg_tracing.h"
+#include "nodes/queryjumble.h"
+#include "parser/parse_node.h"
+
+/*
+ * Normalise query: - Comments are removed - Constants are replaced by $x -
+ * All tokens are separated by a single space
+ */
+extern const char *normalise_query_parameters(const JumbleState *jstate, const char *query,
+											  int query_loc, int *query_len_p, char **paramStr);
+
+extern pgTracingTrace extract_traceparent(const char *query_str, bool is_parameter);
+
+/*
+ * Normalise simple query
+ */
+extern const char *normalise_query(const char *query, int query_loc, int *query_len_p);
+bool		text_store_file(pgTracingSharedState * pg_tracing, const char *query,
+							int query_len, Size *query_offset);
+extern const char *qtext_load_file(Size *buffer_size);
+extern const char *qtext_load_file(Size *buffer_size);
+extern void initialize_traceparent(pgTracingTrace * traceparent_parameter);
+
+/* Location of external text file */
+#define PG_TRACING_TEXT_FILE	PG_STAT_TMP_DIR "/pg_tracing.stat"
+
+#endif
diff --git a/contrib/pg_tracing/span.c b/contrib/pg_tracing/span.c
new file mode 100644
index 0000000000..d2528cb3e7
--- /dev/null
+++ b/contrib/pg_tracing/span.c
@@ -0,0 +1,311 @@
+#include "span.h"
+
+#include "nodes/extensible.h"
+#include "common/pg_prng.h"
+
+
+/*
+ * Convert a node CmdType to the matching SpanType
+ */
+SpanType
+command_type_to_span_type(CmdType cmd_type)
+{
+	switch (cmd_type)
+	{
+		case CMD_SELECT:
+			return SPAN_NODE_SELECT;
+		case CMD_INSERT:
+			return SPAN_NODE_INSERT;
+		case CMD_UPDATE:
+			return SPAN_NODE_UPDATE;
+		case CMD_DELETE:
+			return SPAN_NODE_DELETE;
+		case CMD_MERGE:
+			return SPAN_NODE_MERGE;
+		case CMD_UTILITY:
+			return SPAN_NODE_UTILITY;
+		case CMD_NOTHING:
+			return SPAN_NODE_NOTHING;
+		case CMD_UNKNOWN:
+			return SPAN_NODE_UNKNOWN;
+	}
+	return SPAN_NODE_UNKNOWN;
+}
+
+/*
+ * Initialize span fields
+ */
+static void
+initialize_span_fields(Span * span, SpanType type, const pgTracingTrace * trace, uint64 parent_id, uint64 query_id, int nested_level)
+{
+	span->trace_id = trace->trace_id;
+	span->type = type;
+
+	/*
+	 * If parent id is unset, it means that there's no propagated trace
+	 * informations from the caller. In this case, this is the top span,
+	 * span_id == parent_id and we reuse the generated trace id for the span
+	 * id.
+	 */
+	if (parent_id == 0)
+	{
+		span->parent_id = trace->trace_id;
+		span->span_id = trace->trace_id;
+	}
+	else
+	{
+		span->parent_id = parent_id;
+		span->span_id = pg_prng_uint64(&pg_global_prng_state);
+	}
+	span->name_offset = -1;
+	span->operation_name_offset = -1;
+	span->parameter_offset = -1;
+	span->sql_error_code = 0;
+	span->startup = 0;
+	span->be_pid = MyProcPid;
+	span->subxact_count = MyProc->subxidStatus.count;
+	span->duration_ns = 0;
+	span->ended = false;
+	span->query_id = query_id;
+	span->nested_level = nested_level;
+	memset(&span->node_counters, 0, sizeof(NodeCounters));
+	memset(&span->plan_counters, 0, sizeof(PlanCounters));
+
+	/*
+	 * Store the starting buffer for planner and process utility spans
+	 *
+	 * TODO: Do we need wal buffer for planner?
+	 */
+	if (type == SPAN_PLANNER || span->type == SPAN_PROCESS_UTILITY)
+	{
+		/* TODO Do we need to track wal for planner? */
+		span->node_counters.buffer_usage = pgBufferUsage;
+		span->node_counters.wal_usage = pgWalUsage;
+	}
+}
+
+/*
+ * Initialize span and set span starting time.
+ *
+ * A span needs a start timestamp and a duration.
+ * Given that spans can have a very short duration (less than 1ms), we
+ * need to rely on monotonic clock as much as possible to have the best precision.
+ *
+ * For that, after we established that a query was sampled, we get both the start
+ * timestamp and instr_time. All subsequent times will be taken instr_time and we
+ * deduct the starting time from the delta between the current instr_time and the
+ * start_instr_time.
+ *
+ * We provide multiple times:
+ * - start_trace_ts is the earliest timestamp we've captured for the current trace.
+ * - start_trace_instr_time is the instr_time taken at the same moment as the start_trace_ts.
+ * - start_span_time is an optional start instr_time of the provided span. It will be used
+ *   to get the start timestamp of the span. If nothing is provided, we use the current instr_time.
+ *   This parameter is mostly used when generating spans from planstate as we need to rely on the
+ *   query instrumentation to find the node start.
+ */
+void
+begin_span(Span * span, SpanType type, const pgTracingTrace * trace, uint64 parent_id, uint64 query_id, const int64 *start_span, int nested_level)
+{
+	int64		start_ns;
+
+	initialize_span_fields(span, type, trace, parent_id, query_id, nested_level);
+
+	/*
+	 * If no start span is provided, get the current one
+	 */
+	if (start_span == NULL)
+		start_ns = get_current_ns();
+	else
+		start_ns = *start_span;
+
+	/*
+	 * We use duration to temporarily store ns at span start
+	 */
+	span->duration_ns = start_ns;
+
+	/*
+	 * Compute the span timestamp start by using delta instr_time between
+	 * start trace and start span.
+	 */
+	span->start = trace->start_trace.ts + (start_ns - trace->start_trace.ns) / NS_PER_US;
+}
+
+/*
+ * Set span duration and accumulated buffers
+ * end_span_input is optional, if NULL is passed, we use
+ * the current time
+ */
+void
+end_span(Span * span, const int64 *end_span_input)
+{
+	BufferUsage buffer_usage;
+	WalUsage	wal_usage;
+	int64		end_span_ns;
+
+	/*
+	 * We used span->duration as a temporary storage for the instr_time at the
+	 * begining of the span, fetch it
+	 */
+	int64		start_span_ns = span->duration_ns;
+
+	Assert(start_span_ns > 0);
+	Assert(span->trace_id > 0);
+
+	/* Span should be ended only once */
+	Assert(span->ended == false);
+	span->ended = true;
+
+	/*
+	 * Set span duration with the end time before substrating the start
+	 */
+	if (end_span_input == NULL)
+		end_span_ns = get_current_ns();
+	else
+		end_span_ns = *end_span_input;
+	span->duration_ns = end_span_ns - start_span_ns;
+
+	if (span->type == SPAN_PLANNER || span->type == SPAN_PROCESS_UTILITY)
+	{
+		/* calc differences of buffer counters. */
+		memset(&buffer_usage, 0, sizeof(BufferUsage));
+		BufferUsageAccumDiff(&buffer_usage, &pgBufferUsage,
+							 &span->node_counters.buffer_usage);
+		span->node_counters.buffer_usage = buffer_usage;
+
+		/* calc differences of WAL counters. */
+		memset(&wal_usage, 0, sizeof(wal_usage));
+		WalUsageAccumDiff(&wal_usage, &pgWalUsage,
+						  &span->node_counters.wal_usage);
+		span->node_counters.wal_usage = wal_usage;
+	}
+}
+
+/*
+ * Get the name of a span.
+ * If it is a node span, the name may be pulled from the stat file.
+ */
+const char *
+get_span_name(const Span * span, const char *qbuffer, Size qbuffer_size)
+{
+	if (span->name_offset != -1)
+	{
+		if (span->name_offset <= qbuffer_size && qbuffer_size > 0)
+			return qbuffer + span->name_offset;
+		else if (span->name_offset > qbuffer_size && qbuffer_size > 0)
+			return qbuffer + qbuffer_size;
+	}
+
+	switch (span->type)
+	{
+		case SPAN_POST_PARSE:
+			return "Post Parse";
+		case SPAN_PARSE:
+			return "Parse";
+		case SPAN_PLANNER:
+			return "Planner";
+		case SPAN_FUNCTION:
+			return "Function";
+		case SPAN_PROCESS_UTILITY:
+			return "ProcessUtility";
+		case SPAN_EXECUTOR_START:
+			return "Executor";
+		case SPAN_EXECUTOR_RUN:
+			return "Executor";
+		case SPAN_EXECUTOR_END:
+			return "Executor";
+		case SPAN_EXECUTOR_FINISH:
+			return "Executor";
+
+		case SPAN_NODE_INIT_PLAN:
+			return "InitPlan";
+		case SPAN_NODE_SUBPLAN:
+			return "SubPlan";
+
+		case SPAN_NODE_SELECT:
+			return "Select";
+		case SPAN_NODE_INSERT:
+			return "Insert";
+		case SPAN_NODE_UPDATE:
+			return "Update";
+		case SPAN_NODE_DELETE:
+			return "Delete";
+		case SPAN_NODE_MERGE:
+			return "Merge";
+		case SPAN_NODE_UTILITY:
+			return "Utility";
+		case SPAN_NODE_NOTHING:
+			return "Nothing";
+		case SPAN_NODE_UNKNOWN:
+			return "Unknown";
+
+			/* Check name offset for span node */
+		case SPAN_NODE:
+			return "Node";
+	}
+	return "???";
+}
+
+/*
+ * Get the operation of a span.
+ * If it is a node span, the name may be pulled from the stat file.
+ */
+const char *
+get_operation_name(const Span * span, const char *qbuffer, Size qbuffer_size)
+{
+	if (span->operation_name_offset != -1)
+	{
+		if (span->operation_name_offset <= qbuffer_size && qbuffer_size > 0)
+			return qbuffer + span->operation_name_offset;
+		else if (span->operation_name_offset > qbuffer_size && qbuffer_size > 0)
+			return qbuffer + qbuffer_size;
+	}
+
+	switch (span->type)
+	{
+		case SPAN_POST_PARSE:
+			return "Post Parse";
+		case SPAN_PARSE:
+			return "Parse";
+		case SPAN_PLANNER:
+			return "Planner";
+		case SPAN_FUNCTION:
+			return "Function";
+		case SPAN_PROCESS_UTILITY:
+			return "ProcessUtility";
+		case SPAN_EXECUTOR_START:
+			return "Start";
+		case SPAN_EXECUTOR_RUN:
+			return "Run";
+		case SPAN_EXECUTOR_END:
+			return "End";
+		case SPAN_EXECUTOR_FINISH:
+			return "Finish";
+			/* Node should have name offset set */
+		case SPAN_NODE_SELECT:
+		case SPAN_NODE_INSERT:
+		case SPAN_NODE_UPDATE:
+		case SPAN_NODE_DELETE:
+		case SPAN_NODE_MERGE:
+		case SPAN_NODE_UTILITY:
+		case SPAN_NODE_NOTHING:
+		case SPAN_NODE_INIT_PLAN:
+		case SPAN_NODE_SUBPLAN:
+		case SPAN_NODE_UNKNOWN:
+		case SPAN_NODE:
+			return "Node";
+	}
+	return "Unknown type";
+}
+
+/*
+* Get the current nanoseconds value
+*/
+int64
+get_current_ns(void)
+{
+	instr_time	t;
+
+	INSTR_TIME_SET_CURRENT(t);
+	return INSTR_TIME_GET_NANOSEC(t);
+}
diff --git a/contrib/pg_tracing/span.h b/contrib/pg_tracing/span.h
new file mode 100644
index 0000000000..109f2eb7a9
--- /dev/null
+++ b/contrib/pg_tracing/span.h
@@ -0,0 +1,153 @@
+/*-------------------------------------------------------------------------
+ * contrib/pg_tracing/span.h
+ *
+ * 	Header for span.
+ *
+ * IDENTIFICATION
+ *	  contrib/pg_tracing/span.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _SPAN_H_
+#define _SPAN_H_
+
+#include "postgres.h"
+
+#include "jit/jit.h"
+#include "pgstat.h"
+#include "access/transam.h"
+
+/*
+ * SpanType: Type of the span
+ */
+typedef enum
+{
+	SPAN_PARSE,					/* Wraps query parsing */
+	SPAN_POST_PARSE,			/* Wraps post parsing */
+	SPAN_PLANNER,				/* Wraps planner execution in planner hook */
+	SPAN_FUNCTION,				/* Wraps function in fmgr hook */
+	SPAN_PROCESS_UTILITY,		/* Wraps ProcessUtility execution */
+
+	SPAN_EXECUTOR_START,		/* Executor Spans wrapping the matching hooks */
+	SPAN_EXECUTOR_RUN,
+	SPAN_EXECUTOR_END,
+	SPAN_EXECUTOR_FINISH,
+
+	SPAN_NODE_INIT_PLAN,
+	SPAN_NODE_SUBPLAN,
+
+	SPAN_NODE,					/* Represents a node execution, generated from
+								 * planstate */
+
+	SPAN_NODE_SELECT,			/* Query Span types. They are created from the
+								 * query cmdType */
+	SPAN_NODE_INSERT,
+	SPAN_NODE_UPDATE,
+	SPAN_NODE_DELETE,
+	SPAN_NODE_MERGE,
+	SPAN_NODE_UTILITY,
+	SPAN_NODE_NOTHING,
+	SPAN_NODE_UNKNOWN,
+}			SpanType;
+
+
+/*
+ * Counters extracted from query instrumentation
+ */
+typedef struct NodeCounters
+{
+	int64		rows;			/* # of tuples processed */
+	int64		nloops;			/* # of cycles for this node */
+
+	BufferUsage buffer_usage;	/* total buffer usage for this node */
+	WalUsage	wal_usage;		/* total WAL usage for this node */
+	JitInstrumentation jit_usage;	/* total JIT usage for this node */
+}			NodeCounters;
+
+/*
+ * Counters extracted from query's plan
+ */
+typedef struct PlanCounters
+{
+	/*
+	 * estimated execution costs for plan (see costsize.c for more info)
+	 */
+	Cost		startup_cost;	/* cost expended before fetching any tuples */
+	Cost		total_cost;		/* total cost (assuming all tuples fetched) */
+
+	/*
+	 * planner's estimate of result size of this plan step
+	 */
+	Cardinality plan_rows;		/* number of rows plan is expected to emit */
+	int			plan_width;		/* average row width in bytes */
+}			PlanCounters;
+
+typedef struct
+{
+	TimestampTz ts;
+	int64		ns;
+}			pgTracingStartTime;
+
+typedef struct pgTracingTrace
+{
+	uint64		trace_id;		/* Id of the trace */
+	uint64		parent_id;		/* Span id of the parent */
+	int			sampled;		/* Is current statement sampled? */
+	uint64		query_id;		/* Query id of the current statement */
+	pgTracingStartTime start_trace;
+}			pgTracingTrace;
+
+/*
+ * The Span data structure represents an operation with a start, a duration.
+ * It contains the minimum needed to represent a span and serves as a base
+ * for the SpanNode. It is used for simple operations that don't need
+ * significant metadata like Executor spans.
+ */
+typedef struct Span
+{
+	uint64		trace_id;		/* Trace id extracted from the SQLCommenter's
+								 * traceparent */
+	uint64		span_id;		/* Span Identifier generated from a random
+								 * uint64 */
+	uint64		parent_id;		/* Span's parent id. For the top span, it's
+								 * extracted from SQLCommenter's traceparent.
+								 * For other spans, we pass the parent's span. */
+	uint64		query_id;
+
+	TimestampTz start;			/* Start of the span. */
+	int64		duration_ns;	/* Duration of the span in nanoseconds. */
+	SpanType	type;			/* Type of the span. Used to generate the
+								 * span's name for all spans except SPAN_NODE. */
+	int			be_pid;			/* Pid of the backend process */
+	bool		top_span;
+	bool		ended;			/* Track if the span was already ended */
+	uint8		nested_level;	/* Only used for debugging */
+	uint8		subxact_count;	/* Active count of backend's subtransaction */
+
+	/*
+	 * We store variable size metadata in an external file. Those represent
+	 * the position of NULL terminated strings in the file. Set to -1 if
+	 * unused.
+	 */
+	Size		name_offset;	/* span name offset in external file */
+	Size		operation_name_offset;	/* operation name offset in external
+										 * file */
+	Size		parameter_offset;	/* parameters offset in external file */
+
+	PlanCounters plan_counters; /* Counters with plan costs */
+	NodeCounters node_counters; /* Counters with node costs (jit, wal,
+								 * buffers) */
+	int64		startup;		/* Time to the first tuple */
+	int			sql_error_code; /* query error code extracted from ErrorData,
+								 * 0 if query was successful */
+}			Span;
+
+extern void begin_span(Span * span, SpanType type, const pgTracingTrace * trace, uint64 parent_id, uint64 query_id, const int64 *start_span, int nested_level);
+extern void end_span(Span * span, const int64 *end_time);
+
+extern SpanType command_type_to_span_type(CmdType cmd_type);
+extern const char *get_span_name(const Span * span, const char *qbuffer, Size qbuffer_size);
+extern const char *get_operation_name(const Span * span, const char *qbuffer, Size qbuffer_size);
+extern int64 get_current_ns(void);
+
+#endif
diff --git a/contrib/pg_tracing/sql/cleanup.sql b/contrib/pg_tracing/sql/cleanup.sql
new file mode 100644
index 0000000000..ab11333d4e
--- /dev/null
+++ b/contrib/pg_tracing/sql/cleanup.sql
@@ -0,0 +1,3 @@
+DROP TABLE pg_tracing_test;
+DROP function test_function;
+DROP EXTENSION pg_tracing;
diff --git a/contrib/pg_tracing/sql/create_table.sql b/contrib/pg_tracing/sql/create_table.sql
new file mode 100644
index 0000000000..87875a113f
--- /dev/null
+++ b/contrib/pg_tracing/sql/create_table.sql
@@ -0,0 +1 @@
+CREATE TABLE pg_tracing_test (a int, b char(20));
diff --git a/contrib/pg_tracing/sql/nested.sql b/contrib/pg_tracing/sql/nested.sql
new file mode 100644
index 0000000000..59d52b319b
--- /dev/null
+++ b/contrib/pg_tracing/sql/nested.sql
@@ -0,0 +1,78 @@
+-- Create test functions
+CREATE OR REPLACE FUNCTION test_function(a int) RETURNS SETOF oid AS
+$BODY$
+BEGIN
+	RETURN QUERY SELECT oid from pg_class where oid = a;
+END;
+$BODY$
+LANGUAGE plpgsql;
+
+CREATE OR REPLACE PROCEDURE sum_one(i int) AS $$
+DECLARE
+  r int;
+BEGIN
+  SELECT (i + i)::int INTO r;
+END; $$ LANGUAGE plpgsql;
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000051-0000000000000051-01'*/ select test_function(1);
+
+SELECT span_id AS top_span,
+		extract(epoch from span_start) as top_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_end
+		from pg_tracing_spans(false) where parent_id=81 and name!='Parse' \gset
+SELECT span_id AS top_run_span,
+		extract(epoch from span_start) as top_run_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_run_end
+		from pg_tracing_spans(false) where parent_id=:top_span and name='Executor' and resource='Run' \gset
+SELECT span_id AS top_project,
+		extract(epoch from span_start) as top_project_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_project_end
+		from pg_tracing_spans(false) where parent_id=:top_run_span and name='ProjectSet' \gset
+SELECT span_id AS top_result,
+		extract(epoch from span_start) as top_result_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_result_end
+		from pg_tracing_spans(false) where parent_id=:top_project and name='Result' \gset
+SELECT span_id AS nested_select,
+		extract(epoch from span_start) as select_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as select_end
+		from pg_tracing_spans(false) where parent_id=:top_result and name='Select' \gset
+SELECT span_id AS nested_run,
+		extract(epoch from span_start) as run_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as run_end
+		from pg_tracing_spans(false) where parent_id=:nested_select and resource='Run' \gset
+
+SELECT :top_start < :top_run_start AS top_query_before_run,
+		:top_end >= :top_run_end AS top_end_after_run_end,
+
+		:top_run_start <= :top_project_start AS top_run_start_before_project,
+
+		:top_run_end >= :top_project_end AS top_run_end_after_project_end,
+		:top_run_end >= :select_end AS top_run_end_before_select_end,
+		:top_run_end >= :run_end AS top_run_end_after_nested_run_end,
+
+		:run_end >= :select_end AS run_end_after_select_end;
+
+WITH max_duration AS (select max(duration) from pg_tracing_spans(false))
+SELECT duration = max_duration.max from pg_tracing_spans(false), max_duration
+    where span_id = :top_span;
+
+SELECT resource from pg_tracing_spans(false) where parent_id=:nested_run order by resource;
+
+
+-- Check tracking option
+set pg_tracing.track = 'top';
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000052-0000000000000052-01'*/ select test_function(1);
+SELECT count(*) from pg_tracing_spans where trace_id=82;
+set pg_tracing.track = 'none';
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000053-0000000000000053-01'*/ select test_function(1);
+SELECT count(*) from pg_tracing_spans where trace_id=83;
+set pg_tracing.track = 'all';
+
+/*traceparent='00-00000000000000000000000000000054-0000000000000054-01'*/ CALL sum_one(3);
+SELECT resource from pg_tracing_spans order by span_start, duration desc, resource;
+
+
+set pg_tracing.track_utility=off;
+
+/*traceparent='00-00000000000000000000000000000055-0000000000000055-01'*/ CALL sum_one(10);
+SELECT resource from pg_tracing_spans order by span_start, duration desc, resource;
diff --git a/contrib/pg_tracing/sql/parallel.sql b/contrib/pg_tracing/sql/parallel.sql
new file mode 100644
index 0000000000..15e05b26bc
--- /dev/null
+++ b/contrib/pg_tracing/sql/parallel.sql
@@ -0,0 +1,12 @@
+begin;
+-- encourage use of parallel plans
+set parallel_setup_cost=0;
+set parallel_tuple_cost=0;
+set min_parallel_table_scan_size=0;
+set max_parallel_workers_per_gather=4;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ select 1 from pg_class limit 1;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ select 2 from pg_class limit 2;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-00'*/ select 3 from pg_class limit 3;
+commit;
+
+SELECT resource from pg_tracing_spans order by resource;
diff --git a/contrib/pg_tracing/sql/reset.sql b/contrib/pg_tracing/sql/reset.sql
new file mode 100644
index 0000000000..531db2172d
--- /dev/null
+++ b/contrib/pg_tracing/sql/reset.sql
@@ -0,0 +1,3 @@
+-- Check reset is working
+SELECT pg_tracing_reset();
+SELECT traces from pg_tracing_info;
diff --git a/contrib/pg_tracing/sql/sample.sql b/contrib/pg_tracing/sql/sample.sql
new file mode 100644
index 0000000000..6c9f8d7aa7
--- /dev/null
+++ b/contrib/pg_tracing/sql/sample.sql
@@ -0,0 +1,33 @@
+-- Trace nothing
+SET pg_tracing.sample_rate = 0.0;
+SET pg_tracing.caller_sample_rate = 0.0;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000021-0000000000000021-01'*/ SELECT 1;
+
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+select resource, parameters from pg_tracing_spans order by span_start, duration desc;
+
+-- Trace everything
+SET pg_tracing.sample_rate = 1.0;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000022-0000000000000022-01'*/ SELECT 1;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000023-0000000000000023-00'*/ SELECT 2;
+SELECT 3;
+SELECT 4;
+
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+select resource, parameters from pg_tracing_spans(false) order by span_start, duration desc;
+-- Top spans should reuse generated ids
+select resource, parameters from pg_tracing_spans(false) where trace_id = parent_id and parent_id = span_id order by span_start, duration desc;
+select resource, parameters from pg_tracing_spans order by span_start, duration desc;
+
+-- Cleanup
+SET plan_cache_mode='auto';
+SET pg_tracing.sample_rate = 0.0;
+
+-- Only trace query with sampled flag
+SET pg_tracing.caller_sample_rate = 1.0;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000024-0000000000000024-01'*/ SELECT 1;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000025-0000000000000025-00'*/ SELECT 2;
+SELECT 1;
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+select resource, parameters from pg_tracing_spans(false) order by span_start, duration desc;
+select resource, parameters from pg_tracing_spans order by span_start;
diff --git a/contrib/pg_tracing/sql/select.sql b/contrib/pg_tracing/sql/select.sql
new file mode 100644
index 0000000000..2c92df7b13
--- /dev/null
+++ b/contrib/pg_tracing/sql/select.sql
@@ -0,0 +1,60 @@
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ SELECT 1;
+
+-- Get top span id
+SELECT span_id AS top_span_id from pg_tracing_spans(false) where parent_id=1 and name!='Parse' \gset
+
+-- Check parameters
+SELECT parameters from pg_tracing_spans(false) where span_id=:top_span_id;
+
+-- Check the number of children
+SELECT count(*) from pg_tracing_spans(false) where parent_id=:'top_span_id';
+
+-- Check resource and query id
+SELECT resource, query_id from pg_tracing_spans(false) where trace_id=1 order by span_start, duration desc, resource;
+
+-- Check reported number of trace
+SELECT traces from pg_tracing_info;
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ SELECT * from current_database();
+SELECT resource from pg_tracing_spans where trace_id=3 order by resource;
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000004-0000000000000004-01'*/ SELECT s.relation_size + s.index_size
+FROM (SELECT
+      pg_relation_size(C.oid) as relation_size,
+      pg_indexes_size(C.oid) as index_size
+    FROM pg_class C) as s limit 1;
+SELECT resource from pg_tracing_spans where trace_id=4 order by resource;
+
+-- Check that we're in a correct state after a timeout
+set statement_timeout=200;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000007-0000000000000007-01'*/ select * from pg_sleep(10);
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000008-0000000000000008-01'*/ select 1;
+SELECT trace_id, resource, sql_error_code from pg_tracing_spans order by span_start, duration desc, resource;
+set statement_timeout=0;
+
+-- Test prepared statement
+PREPARE test_prepared (text, integer) AS /*$1*/ SELECT $2;
+EXECUTE test_prepared('dddbs=''postgres.db'',traceparent=''00-00000000000000000000000000000009-0000000000000009-01''', 1);
+SELECT trace_id, resource, parameters from pg_tracing_spans order by span_start, duration desc, resource;
+
+-- Check explain is untracked
+explain (costs off)
+select * from pg_class where oid < 0 limit 100;
+SELECT trace_id, resource, parameters from pg_tracing_spans order by span_start, duration desc, resource;
+
+-- Never executed node
+/*dddbs='postgres.db',traceparent='00-0000000000000000000000000000000a-000000000000000a-01'*/ select 1 limit 0;
+SELECT trace_id, resource, parameters from pg_tracing_spans order by span_start, duration desc, resource;
+
+-- Test prepared statement with generic plan
+SET plan_cache_mode='force_generic_plan';
+EXECUTE test_prepared('dddbs=''postgres.db'',traceparent=''00-00000000000000000000000000000010-0000000000000010-01''', 10);
+SELECT trace_id, resource, parameters from pg_tracing_spans order by span_start, duration desc, resource;
+
+-- Test multiple statements
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000011-0000000000000012-01'*/ select 1; select 2;
+select resource, parameters from pg_tracing_spans order by span_start, duration desc;
+
+-- Cleanup
+SET plan_cache_mode='auto';
+DEALLOCATE test_prepared;
diff --git a/contrib/pg_tracing/sql/subxact.sql b/contrib/pg_tracing/sql/subxact.sql
new file mode 100644
index 0000000000..95408e6348
--- /dev/null
+++ b/contrib/pg_tracing/sql/subxact.sql
@@ -0,0 +1,12 @@
+SET pg_tracing.sample_rate = 1.0;
+BEGIN;
+SAVEPOINT s1;
+INSERT INTO pg_tracing_test VALUES(generate_series(1, 2), 'aaa');
+SAVEPOINT s2;
+INSERT INTO pg_tracing_test VALUES(generate_series(1, 2), 'aaa');
+SAVEPOINT s3;
+SELECT 1;
+COMMIT;
+
+select resource, parameters, subxact_count from pg_tracing_spans order by span_start, duration desc;
+
diff --git a/contrib/pg_tracing/sql/utility.sql b/contrib/pg_tracing/sql/utility.sql
new file mode 100644
index 0000000000..4d08a27c1c
--- /dev/null
+++ b/contrib/pg_tracing/sql/utility.sql
@@ -0,0 +1,72 @@
+-- Track statements in create extension
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ CREATE EXTENSION pg_tracing;
+
+SELECT span_id AS top_utility,
+		extract(epoch from span_start) as top_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_utility_end
+		from pg_tracing_spans(false) where parent_id=1 and name='Utility' \gset
+
+SELECT span_id AS top_process_utility,
+		extract(epoch from span_start) as top_process_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as top_process_utility_end
+		from pg_tracing_spans(false) where parent_id=:top_utility and name='ProcessUtility' \gset
+
+SELECT span_id AS first_utility,
+		extract(epoch from span_start) as first_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as first_utility_end
+		from pg_tracing_spans(false) where parent_id=:top_process_utility and name='Utility' limit 1 \gset
+
+SELECT span_id AS first_process_utility,
+		extract(epoch from span_start) as first_process_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as first_process_utility_end
+		from pg_tracing_spans(false) where parent_id=:first_utility and name='ProcessUtility' \gset
+
+SELECT span_id AS second_utility,
+		extract(epoch from span_start) as second_utility_start,
+		round(extract(epoch from span_start) + duration / 1000000000.0, 6) as second_utility_end
+		from pg_tracing_spans(false) where parent_id=:top_process_utility and name='Utility' limit 1 offset 1 \gset
+
+SELECT :top_utility_start < :top_process_utility_start AS top_utility_start_before_process_utility,
+		:top_utility_end >= :top_process_utility_end AS top_end_after_process_utility,
+
+		:first_process_utility_end < :first_utility_end AS nested_process_utility_ends_before,
+		:first_utility_end < :second_utility_start AS second_utility_start_after_first_one;
+
+-- Clean current spans
+select count(*) from pg_tracing_spans;
+
+-- Test utility off
+SET pg_tracing.track_utility = off;
+
+-- Test prepared statement
+PREPARE test_prepared_one_param (integer) AS SELECT $1;
+EXECUTE test_prepared_one_param(100);
+
+select count(*) from pg_tracing_spans;
+
+-- Test prepared statement with generic plan
+SET plan_cache_mode='force_generic_plan';
+EXECUTE test_prepared_one_param(200);
+
+select count(*) from pg_tracing_spans;
+
+
+-- With utility on
+SET pg_tracing.track_utility = on;
+
+-- Test prepared statement
+PREPARE test_prepared_one_param_2 (integer) AS SELECT $1;
+EXECUTE test_prepared_one_param_2(100);
+
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+select resource, parameters from pg_tracing_spans(false) order by span_start, duration desc;
+select resource, parameters from pg_tracing_spans where trace_id = parent_id and parent_id = span_id order by span_start;
+
+-- Test prepared statement with generic plan
+SET plan_cache_mode='force_generic_plan';
+EXECUTE test_prepared_one_param(200);
+
+select count(distinct(trace_id)) from pg_tracing_spans(false);
+select resource, parameters from pg_tracing_spans(false) order by span_start, duration desc;
+select resource, parameters from pg_tracing_spans where trace_id = parent_id and parent_id = span_id order by span_start;
+
diff --git a/contrib/pg_tracing/sql/wal.sql b/contrib/pg_tracing/sql/wal.sql
new file mode 100644
index 0000000000..10c0aa6a56
--- /dev/null
+++ b/contrib/pg_tracing/sql/wal.sql
@@ -0,0 +1,24 @@
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ INSERT INTO pg_tracing_test VALUES(generate_series(1, 10), 'aaa');
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ UPDATE pg_tracing_test SET b = 'bbb' WHERE a > 7;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ DELETE FROM pg_tracing_test WHERE a > 9;
+
+-- Check WAL is generated for the above statements
+SELECT trace_id, name, resource,
+wal_records > 0 as wal_records,
+wal_bytes > 0 as wal_bytes
+FROM pg_tracing_spans order by span_start, duration desc, resource;
+
+-- Redo the same but without wal instrumentation
+set pg_tracing.instrument_wal = false;
+
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ INSERT INTO pg_tracing_test VALUES(generate_series(1, 10), 'aaa');
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ UPDATE pg_tracing_test SET b = 'bbb' WHERE a > 7;
+/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ DELETE FROM pg_tracing_test WHERE a > 9;
+
+SELECT trace_id, name, resource,
+wal_records = 0 as wal_records,
+wal_bytes = 0 as wal_bytes
+FROM pg_tracing_spans order by span_start, duration desc, resource;
+
+-- Cleanup
+set pg_tracing.instrument_wal = true;
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index ee78a5749d..1174e2450f 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -70,6 +70,7 @@ InstrStartNode(Instrumentation *instr)
 	if (instr->need_timer &&
 		!INSTR_TIME_SET_CURRENT_LAZY(instr->starttime))
 		elog(ERROR, "InstrStartNode called twice in a row");
+	INSTR_TIME_SET_CURRENT_LAZY(instr->firsttime);
 
 	/* save buffer usage totals at node entry, if needed */
 	if (instr->need_bufusage)
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 87e5e2183b..13840be1d7 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -72,6 +72,7 @@ typedef struct Instrumentation
 	bool		async_mode;		/* true if node is in async mode */
 	/* Info about current plan cycle: */
 	bool		running;		/* true if we've completed first tuple */
+	instr_time	firsttime;		/* start time of this node */
 	instr_time	starttime;		/* start time of current iteration of node */
 	instr_time	counter;		/* accumulated runtime for this node */
 	double		firsttuple;		/* time for first tuple of this cycle */