Re: [HACKERS] Cutting initdb's runtime (Perl question embedded) - Mailing list pgsql-hackers
| From | ilmari@ilmari.org (Dagfinn Ilmari Mannsåker) |
|---|---|
| Subject | Re: [HACKERS] Cutting initdb's runtime (Perl question embedded) |
| Date | |
| Msg-id | d8jpogbq8r1.fsf@dalvik.ping.uio.no Whole thread Raw |
| In response to | Re: [HACKERS] Cutting initdb's runtime (Perl question embedded) (Tom Lane <tgl@sss.pgh.pa.us>) |
| Responses |
Re: [HACKERS] Cutting initdb's runtime (Perl question embedded)
Re: [HACKERS] Cutting initdb's runtime (Perl question embedded) |
| List | pgsql-hackers |
Tom Lane <tgl@sss.pgh.pa.us> writes:
> There's certainly lots more that could be done in the genbki code,
> but I think all we can justify at this stage of the development
> cycle is to get the low-hanging fruit for testing speedups.
I threw Devel::NYTProf at it and picked some more low-hanging fruit.
Attached are separate patches for each change, and here are the runtimes
of genbki.pl and Gen_fmgrtab.pl, respectively, after each patch
(averages of 5 runs, in millseconds):
master (b6dd1271): 355, 182
1: Avoid unnecessary regex captures: 349, 183
2: Avoid repeated calls to SplitDataLine: 316, 158
3: Inline SplitDataLine: 291, 141
4: Inline check_natts: 287, 141
Together they shave 68ms or 19.2% off the runtime of genbki.pl and 41ms
or 22.5% off the runtime of Gen_fmgrtab.pl
Finally, one non-performance patch, which just removes the use of
Exporter in Catalog.pm, since none of the users actually import anything
from it.
- ilmari
--
"I use RMS as a guide in the same way that a boat captain would use
a lighthouse. It's good to know where it is, but you generally
don't want to find yourself in the same spot." - Tollef Fog Heen
From 5d7f4b1e78243daa6939501b88b2644bfcf9bd77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilmari@ilmari.org>
Date: Mon, 17 Apr 2017 13:26:35 +0100
Subject: [PATCH 1/8] Avoid unnecessary regex captures in Catalog.pm
---
src/backend/catalog/Catalog.pm | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index fa8de04..e7b647a 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -54,7 +54,7 @@ sub Catalogs
{
# Strip C-style comments.
- s;/\*(.|\n)*\*/;;g;
+ s;/\*(?:.|\n)*\*/;;g;
if (m;/\*;)
{
@@ -80,12 +80,12 @@ sub Catalogs
{
$catalog{natts} = $1;
}
- elsif (/^DATA\(insert(\s+OID\s+=\s+(\d+))?\s+\(\s*(.*)\s*\)\s*\)$/)
+ elsif (/^DATA\(insert(?:\s+OID\s+=\s+(\d+))?\s+\(\s*(.*)\s*\)\s*\)$/)
{
- check_natts($filename, $catalog{natts}, $3,
+ check_natts($filename, $catalog{natts}, $2,
$input_file, $input_line_number);
- push @{ $catalog{data} }, { oid => $2, bki_values => $3 };
+ push @{ $catalog{data} }, { oid => $1, bki_values => $2 };
}
elsif (/^DESCR\(\"(.*)\"\)$/)
{
--
2.7.4
From 92402e0306eda209b1a2811acc41325d75add0cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilmari@ilmari.org>
Date: Mon, 17 Apr 2017 14:04:20 +0100
Subject: [PATCH 2/8] Avoid repeated calls to Catalog::SplitDataLine
Both check_natts and the callers of Catalog::Catalogs were calling
Catalog::SplitDataLines, the former discarding the result.
Instead, have Catalog::Catalogs store the split fields directly and pass
the count to check_natts
---
src/backend/catalog/Catalog.pm | 14 +++++++-------
src/backend/catalog/genbki.pl | 3 +--
src/backend/utils/Gen_fmgrtab.pl | 3 +--
3 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index e7b647a..0c508b0 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -82,10 +82,12 @@ sub Catalogs
}
elsif (/^DATA\(insert(?:\s+OID\s+=\s+(\d+))?\s+\(\s*(.*)\s*\)\s*\)$/)
{
- check_natts($filename, $catalog{natts}, $2,
+ my @bki_values = SplitDataLine($2);
+
+ check_natts($filename, $catalog{natts}, scalar(@bki_values),
$input_file, $input_line_number);
- push @{ $catalog{data} }, { oid => $1, bki_values => $2 };
+ push @{ $catalog{data} }, { oid => $1, bki_values => \@bki_values };
}
elsif (/^DESCR\(\"(.*)\"\)$/)
{
@@ -254,17 +256,15 @@ sub RenameTempFile
# verify the number of fields in the passed-in DATA line
sub check_natts
{
- my ($catname, $natts, $bki_val, $file, $line) = @_;
+ my ($catname, $natts, $bki_vals, $file, $line) = @_;
die "Could not find definition for Natts_${catname} before start of DATA() in $file\n"
unless defined $natts;
- my $nfields = scalar(SplitDataLine($bki_val));
-
die sprintf
"Wrong number of attributes in DATA() entry at %s:%d (expected %d but got %d)\n",
- $file, $line, $natts, $nfields
- unless $natts == $nfields;
+ $file, $line, $natts, $bki_vals
+ unless $natts == $bki_vals;
}
1;
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index 198e072..8875f6c 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -161,9 +161,8 @@ foreach my $catname (@{ $catalogs->{names} })
foreach my $row (@{ $catalog->{data} })
{
- # Split line into tokens without interpreting their meaning.
my %bki_values;
- @bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
+ @bki_values{@attnames} = @{$row->{bki_values}};
# Perform required substitutions on fields
foreach my $att (keys %bki_values)
diff --git a/src/backend/utils/Gen_fmgrtab.pl b/src/backend/utils/Gen_fmgrtab.pl
index 76bdf5c..d2c4617 100644
--- a/src/backend/utils/Gen_fmgrtab.pl
+++ b/src/backend/utils/Gen_fmgrtab.pl
@@ -58,9 +58,8 @@ foreach my $column (@{ $catalogs->{pg_proc}->{columns} })
my $data = $catalogs->{pg_proc}->{data};
foreach my $row (@$data)
{
- # Split line into tokens without interpreting their meaning.
my %bki_values;
- @bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
+ @bki_values{@attnames} = @{$row->{bki_values}};
# Select out just the rows for internal-language procedures.
# Note assumption here that INTERNALlanguageId is 12.
--
2.7.4
From c8e55bd9ff36029c3f4fe7053f54f9f862c79d7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilmari@ilmari.org>
Date: Mon, 17 Apr 2017 14:47:06 +0100
Subject: [PATCH 3/8] Inline SplitDataLines into its only caller
---
src/backend/catalog/Catalog.pm | 38 ++++++++++++++------------------------
1 file changed, 14 insertions(+), 24 deletions(-)
diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index 0c508b0..9bd6263 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -82,12 +82,24 @@ sub Catalogs
}
elsif (/^DATA\(insert(?:\s+OID\s+=\s+(\d+))?\s+\(\s*(.*)\s*\)\s*\)$/)
{
- my @bki_values = SplitDataLine($2);
+ # The field-splitting regex will clobber $1, save it
+ my $oid = $1;
+
+ # This handling of quoted strings might look too
+ # simplistic, but it matches what bootscanner.l does:
+ # that has no provision for quote marks inside quoted
+ # strings, either. If we don't have a quoted string,
+ # just snarf everything till next whitespace. That will
+ # accept some things that bootscanner.l will see as
+ # erroneous tokens; but it seems wiser to do that and
+ # let bootscanner.l complain than to silently drop
+ # non-whitespace characters.
+ my @bki_values = $2 =~ /"[^"]*"|\S+/g;
check_natts($filename, $catalog{natts}, scalar(@bki_values),
$input_file, $input_line_number);
- push @{ $catalog{data} }, { oid => $1, bki_values => \@bki_values };
+ push @{ $catalog{data} }, { oid => $oid, bki_values => \@bki_values };
}
elsif (/^DESCR\(\"(.*)\"\)$/)
{
@@ -218,28 +230,6 @@ sub Catalogs
return \%catalogs;
}
-# Split a DATA line into fields.
-# Call this on the bki_values element of a DATA item returned by Catalogs();
-# it returns a list of field values. We don't strip quoting from the fields.
-# Note: it should be safe to assign the result to a list of length equal to
-# the nominal number of catalog fields, because check_natts already checked
-# the number of fields.
-sub SplitDataLine
-{
- my $bki_values = shift;
-
- # This handling of quoted strings might look too simplistic, but it
- # matches what bootscanner.l does: that has no provision for quote marks
- # inside quoted strings, either. If we don't have a quoted string, just
- # snarf everything till next whitespace. That will accept some things
- # that bootscanner.l will see as erroneous tokens; but it seems wiser
- # to do that and let bootscanner.l complain than to silently drop
- # non-whitespace characters.
- my @result = $bki_values =~ /"[^"]*"|\S+/g;
-
- return @result;
-}
-
# Rename temporary files to final names.
# Call this function with the final file name and the .tmp extension
# Note: recommended extension is ".tmp$$", so that parallel make steps
--
2.7.4
From fc6c69a692002277378308c2f6ca019185ef9fbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilmari@ilmari.org>
Date: Mon, 17 Apr 2017 14:38:22 +0100
Subject: [PATCH 4/8] Inline check_nattrs into its only caller
---
src/backend/catalog/Catalog.pm | 24 ++++++++----------------
1 file changed, 8 insertions(+), 16 deletions(-)
diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index 9bd6263..0aa3e0c 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -96,8 +96,14 @@ sub Catalogs
# non-whitespace characters.
my @bki_values = $2 =~ /"[^"]*"|\S+/g;
- check_natts($filename, $catalog{natts}, scalar(@bki_values),
- $input_file, $input_line_number);
+ # Check that the DATA line matches the declared number of attributes
+ die "Could not find definition for Natts_${catname} before start of DATA() in $filename\n"
+ unless defined $catalog{natts};
+
+ die sprintf
+ "Wrong number of attributes in DATA() entry at %s:%d (expected %d but got %d)\n",
+ $filename, $input_line_number, $catalog{natts}, scalar(@bki_values)
+ unless $catalog{natts} == @bki_values;
push @{ $catalog{data} }, { oid => $oid, bki_values => \@bki_values };
}
@@ -243,18 +249,4 @@ sub RenameTempFile
rename($temp_name, $final_name) || die "rename: $temp_name: $!";
}
-# verify the number of fields in the passed-in DATA line
-sub check_natts
-{
- my ($catname, $natts, $bki_vals, $file, $line) = @_;
-
- die "Could not find definition for Natts_${catname} before start of DATA() in $file\n"
- unless defined $natts;
-
- die sprintf
- "Wrong number of attributes in DATA() entry at %s:%d (expected %d but got %d)\n",
- $file, $line, $natts, $bki_vals
- unless $natts == $bki_vals;
-}
-
1;
--
2.7.4
From 592dbc64d21066f5da43d535af2cb3780b95d006 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilmari@ilmari.org>
Date: Mon, 17 Apr 2017 15:57:18 +0100
Subject: [PATCH 6/8] Remove pointless Exporter usage in Catalog.pm
None of the users actually import any of the subroutines, they just call
them by their fully-qualified names.
---
src/backend/catalog/Catalog.pm | 5 -----
1 file changed, 5 deletions(-)
diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index b326427..d4b4170 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -16,11 +16,6 @@ package Catalog;
use strict;
use warnings;
-require Exporter;
-our @ISA = qw(Exporter);
-our @EXPORT = ();
-our @EXPORT_OK = qw(Catalogs SplitDataLine RenameTempFile);
-
# Call this function with an array of names of header files to parse.
# Returns a nested data structure describing the data in the headers.
sub Catalogs
--
2.7.4
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
pgsql-hackers by date: