From e6ad4c0cc7c046c4e022dd7db423415bd7ee99c1 Mon Sep 17 00:00:00 2001 From: postgres Date: Tue, 28 Apr 2015 11:45:31 +0600 Subject: [PATCH 1/4] added posibility to specify order by primary key --- pg_sample | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pg_sample b/pg_sample index 7e87379..8528b1e 100755 --- a/pg_sample +++ b/pg_sample @@ -112,6 +112,16 @@ Rules are applied in order with the first match taking precedence. Randomize the rows initially selected from each table. May significantly increase the running time of the script. +=item B<--pkey-asc> + +Get the beginning of each table ordered by Primary Key. + +=item B<--pkey-desc> + +Get the ending of each table ordered by Primary Key, e.g. you can get only +fresh data from your production database. Notice than --pkey-asc and --pkey-desc +will reorder data in resulting dump file respectively. + =item B<--schema=>I The schema name to use for the sample database (defaults to _pg_sample). @@ -394,6 +404,8 @@ GetOptions(\%opt, "help|h|?|usage", "keep", "limit=s@", + "pkey-asc|pkey_asc", + "pkey-desc|pkey_asc", "random", "schema=s", "trace", @@ -532,8 +544,23 @@ while (my $row = lower_keys($sth->fetchrow_hashref)) { last; } # warn "\n[LIMIT] $table WHERE $where $limit\n"; + my ($pkey) = $dbh->selectrow_array(qq{ + SELECT a.attname + FROM pg_index i + JOIN pg_attribute a ON a.attrelid = i.indrelid + AND a.attnum = ANY(i.indkey) + WHERE i.indrelid = '$table'::regclass + AND i.indisprimary + }); my $order = $opt{random} ? 'ORDER BY random()' : ''; + if ($pkey) { + if ($opt{'pkey-asc'}) { + $order = "ORDER BY $pkey ASC"; + } elsif ($opt{'pkey-desc'}) { + $order = "ORDER BY $pkey DESC"; + } + } $dbh->do(qq{ CREATE $unlogged TABLE $sample_table AS From 94933633879c282afa9ace0553a1dc2f17c72fec Mon Sep 17 00:00:00 2001 From: mangust404 Date: Tue, 28 Apr 2015 14:15:12 +0600 Subject: [PATCH 2/4] Subquery-friendly limit rules. ---- Move all tables affected by limit rules to the end of the sample dump because they may contains a subquery to tables in sample dump. --- pg_sample | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/pg_sample b/pg_sample index 8528b1e..cd090eb 100755 --- a/pg_sample +++ b/pg_sample @@ -107,6 +107,17 @@ pairs can also be specified as a single comma-separated value. For example: Rules are applied in order with the first match taking precedence. +Note that sample rows with foreign keys will automatically include referencing +foreign rows. + +If your tables are denormalized and don't have foreign keys then you can +use subqueries. + + # include all users where uids are in exported table "users_posts" + --limit="users = uid IN (SELECT uid FROM _pg_sample.public_users_posts)" + +Export of such tables will be postponed untill all the samples will be present. + =item B<--random> Randomize the rows initially selected from each table. May significantly @@ -509,6 +520,9 @@ notice "[limit] $_->[0] = $_->[1]\n" foreach @limits; my @tables; my %sample_tables; # real table name -> sample table name my $sth = $dbh->table_info(undef, undef, undef, 'TABLE'); +my @delayed_tables; +my @delayed_tables_ordered; + while (my $row = lower_keys($sth->fetchrow_hashref)) { next unless uc $row->{table_type} eq 'TABLE'; # skip SYSTEM TABLE values next if $row->{table_schem} eq 'information_schema'; # special pg schema @@ -520,8 +534,50 @@ while (my $row = lower_keys($sth->fetchrow_hashref)) { or die "no pg_table or TABLE_NAME value?!"; my $table = Table->new($sname, $tname); - push @tables, $table; + + # if table match limit one of limit rule, delaying + my $has_limit_rule = 0; + foreach (@limits) { + $table->unquoted =~ /^$_->[0]$/i || $table->table =~ /^$_->[0]$/i or next; + if ($_->[1] =~ /select/i) { + $has_limit_rule = 1; + } + } + + if ($has_limit_rule > 0) { + push @delayed_tables, $table; + } else { + push @tables, $table; + } +} + +# Re-ordering delayed tables as they were specified in limit rules +foreach (@limits) { + my $limit_table = $_->[0]; + next if $limit_table ~~ /^\.\*/; + foreach(@delayed_tables) { + if ($_->table =~ /^$limit_table$/i) { + my $match_table = $_; + my $in_array = 0; + foreach(@delayed_tables_ordered) { + if($_->table =~ /^$match_table->table$/) { + $in_array++; + } + } + if (!$in_array) { + #print "\n ADDING $_ \n\n"; + push @delayed_tables_ordered, $match_table; + } + } + } +} +foreach (@delayed_tables_ordered) { + push @tables, $_; +} + +foreach (@tables) { + my $table = $_; my $sample_table = sample_table($table); $sample_tables{ $table } = $sample_table; @@ -536,7 +592,7 @@ while (my $row = lower_keys($sth->fetchrow_hashref)) { if ($_->[1] eq '*') { # include all rows $limit = ''; } elsif ($_->[1] =~ /^\d+$/) { # numeric value turned into LIMIT - $limit = "LIMIT $_->[1]"; + $limit = "LIMIT $_->[1]"; } else { # otherwise treated as subselect $where = "($_->[1])"; } @@ -545,13 +601,13 @@ while (my $row = lower_keys($sth->fetchrow_hashref)) { } # warn "\n[LIMIT] $table WHERE $where $limit\n"; my ($pkey) = $dbh->selectrow_array(qq{ - SELECT a.attname + SELECT a.attname FROM pg_index i JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey) WHERE i.indrelid = '$table'::regclass AND i.indisprimary - }); + }); my $order = $opt{random} ? 'ORDER BY random()' : ''; if ($pkey) { From b3c56a891d0b3dbf50bb42efb3025e36bb128b8d Mon Sep 17 00:00:00 2001 From: postgres Date: Tue, 28 Apr 2015 16:22:12 +0600 Subject: [PATCH 3/4] Support of nested tables --- pg_sample | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/pg_sample b/pg_sample index cd090eb..3bd3017 100755 --- a/pg_sample +++ b/pg_sample @@ -618,14 +618,42 @@ foreach (@tables) { } } - $dbh->do(qq{ - CREATE $unlogged TABLE $sample_table AS - SELECT * - FROM $table - WHERE $where - $order - $limit + my ($parent) = $dbh->selectrow_array(qq{ + SELECT p.relname AS parent + FROM pg_inherits + JOIN pg_class AS c ON (inhrelid=c.oid) + JOIN pg_class as p ON (inhparent=p.oid) + JOIN pg_namespace pn ON pn.oid = p.relnamespace + JOIN pg_namespace cn ON cn.oid = c.relnamespace + WHERE c.relname = '$table->{table}' and pn.nspname = '$table->{schema}' }); + #notice "\ntable $table parent = $parent\n"; + + if ($parent) { + notice "(as child of $parent) "; + # at first create a child table + $dbh->do(qq{ + CREATE $unlogged TABLE $sample_table () INHERITS ("$opt{schema}"."$table->{schema}_$parent") + }); + # fill it up with sample data + $dbh->do(qq{ + INSERT INTO $sample_table + SELECT * + FROM $table + WHERE $where + $order + $limit + }); + } else { + $dbh->do(qq{ + CREATE $unlogged TABLE $sample_table AS + SELECT * + FROM $table + WHERE $where + $order + $limit + }); + } if ($opt{verbose}) { my ($num_rows) = From 595442cb253555b14119408c4899c642833d0201 Mon Sep 17 00:00:00 2001 From: mangust404 Date: Wed, 29 Apr 2015 11:52:34 +0600 Subject: [PATCH 4/4] Binmode doesn't like utf-8 Added bypass to disable binmode when it's a utf-8 but it's not a solution --- pg_sample | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pg_sample b/pg_sample index 3bd3017..cc5c5c7 100755 --- a/pg_sample +++ b/pg_sample @@ -479,7 +479,7 @@ notice "Server encoding is $server_encoding\n"; $opt{encoding} ||= $server_encoding; notice "Client encoding is $opt{encoding}\n"; -binmode STDOUT, ":encoding($opt{encoding})"; +binmode STDOUT, ":encoding($opt{encoding})" if ! $opt{encoding} =~ /utf[\-]8/i; unless ($opt{'data-only'}) { notice "Exporting schema\n";