- Table of contents
- Postgres queries
- Debugging
- Programming constructs
- anyelement wrapper around text function
- comma operator
- duplicates
- exception block
- EXISTS()
- expression substitution
- for-loop
- FULL JOIN
- hstore a column list
- if-statement in PL/pgSQL
- if-statement in SQL
- inlining
- in-place update
- is more complete than
- is populated more often than
- maintaining derived-column formulas across column renames
- nested transaction
- query plan fixing
- sandbox block
- vars for use in SELECT exprs
- void-returning sql-language functions
- Profiling
- DB structure
- Table stats
- DDL
- Data import
- Data maintenance
- DB info
- Disk usage
Postgres queries¶
Debugging¶
debug-print¶
LANGUAGE sql
:¶
SELECT util.raise('NOTICE', '>>>'||expr);
LANGUAGE plpgsql
:¶
PERFORM util.raise('NOTICE', '>>>'||expr);
debug-print caught exception¶
PERFORM util.raise('NOTICE', '>>>'||SQLERRM);
EXPLAIN from within function¶
requires the BIEN util
schema
PERFORM util.explain2table($$
query
$$);
Programming constructs¶
anyelement wrapper around text function¶
CREATE OR REPLACE FUNCTION function(value anyelement)
RETURNS anyelement AS
$BODY$
DECLARE
result value%TYPE := function(value::text)::unknown;
BEGIN
RETURN result;
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 100;
comma operator¶
-- (a, b, c) -> c
(SELECT c FROM (SELECT a, b) s)
-- (a, b, void) -> void
(SELECT NULL::void FROM (SELECT
a
,
b
) s)
duplicates¶
SELECT column, COUNT(*) FROM table GROUP BY column HAVING COUNT(*) > 1 ORDER BY column
exception block¶
exception_type
: see PostgreSQL Error Codes
BEGIN
...
EXCEPTION
WHEN exception_type THEN ...;
WHEN exception_type THEN NULL; -- suppress
WHEN exception_type
OR exception_type
THEN NULL;
WHEN exception_type THEN -- selective suppress
IF SQLERRM LIKE ... THEN NULL;
ELSE RAISE; -- rethrow
END IF;
WHEN OTHERS THEN ...;
END;
EXISTS()
¶
SELECT NOT EXISTS( -- there is no row that is != _
SELECT NULL
FROM _
WHERE _ IS DISTINCT FROM _
LIMIT 1
);
expression substitution¶
EXPLAIN
can be used to substitute constants, rename columns, and simplify expressions. unlike the equivalent Python code for renaming and simplifying, this is fully general and uses only Postgres.
the simplify-only case (with all vars table cols) is implemented in util.canon_sql()
eg. col_old + (const + 1)
, with const = 1
and col_old
renamed to col_new
, simplifies to col_new + 2
:
-- the following code requires: renamings, constants, column types, result type
EXPLAIN
-- declare types (use WITH so original names *won't* be inlined, and to avoid `types.` prefix)
-- note that these must use the *new* column names
WITH types AS -- if WITH not supported, use temp table
(
SELECT
NULL::/*result type:*/integer AS result_type_null -- when using only table columns, use $$||util.typeof(expr_sql, NULL::table)||$$ for the type
, /*col types:*/ NULL::/*col_old_type:*/integer AS col_new
/*, (NULL::table).table_col_old AS table_col_new*/
/*...*/
)
SELECT *
FROM
-- specify renamings and constants (use nested SELECT so they *will* be inlined)
-- with no renamings or constants, can also use just `types`
(
SELECT
-- with no renamings, can also use `*, constants...`
result_type_null
, /*renamings:*/ col_new AS col_old -- note reverse order, because we want to substitute col_new for col_old
, /*constants:*/ 1 AS const -- to use a var from the surrounding code, use $$||util.quote_typed(var)||$$ instead of just var
/*...*/
FROM types
) s
WHERE (/*expr:*/ col_old + (const + 1) ) IS DISTINCT FROM result_type_null
-- can't just use IS NOT NULL, or even `= result_type_null`, because these will simplify NULL exprs too far
;
-- then extract Filter expression from EXPLAIN output: (col_new + 2) = result_type_null
-- then extract result expression from Filter expression: col_new + 2
for-loop¶
DECLARE
loop_var text = NULL;
BEGIN
FOR loop_var IN
SELECT ...
LOOP
RETURN QUERY
-- ...
;
END LOOP;
END;
FULL JOIN¶
SELECT * FROM
(VALUES (1), (2), (4)) v1 (col)
FULL JOIN
(VALUES (1), (3), (4)) v2 (col)
ON v1.col = v2.col -- can't use USING because that includes only the left column in the result
ORDER BY COALESCE(v1.col, v2.col) -- otherwise, the v2-only rows will come after all v1 rows
hstore
a column list¶
unlike hstore(ROW(col1, col2))
, this preserves the column names and can even apply aliases:
SELECT
(SELECT hstore(s) FROM (SELECT col1, col2 AS col2_alias, value AS col3) s) AS row_hstore
FROM table
if-statement in PL/pgSQL¶
IF cond THEN
-- ...
ELSE
-- ...
END IF;
if-statement in SQL¶
SELECT CASE WHEN _
THEN util.coalesce( -- perform both statements
_::text
, _::text
)::void
END
inlining¶
to test how an IMMUTABLE
function will be inlined:
SELECT * FROM (VALUES (NULL)) v (c)
WHERE
func(c)
IS NOT NULL
in-place update¶
- with
UPDATE
/VACUUM
:UPDATE table SET column = expr(column); VACUUM ANALYZE VERBOSE table; -- needed because autovacuum sometimes doesn't run
- with
ALTER COLUMN TYPE USING
:
note that ALTER columns can't use concurrent ALTER columns' results:ALTER TABLE table ALTER COLUMN column TYPE curr_type USING expr(column);
CREATE TEMP TABLE abc (a text, b text); INSERT INTO abc VALUES ('a', 'b'); ALTER TABLE abc ALTER COLUMN a TYPE text USING 'x' , ALTER COLUMN b TYPE text USING a ; SELECT * FROM abc; -- "x";"a" not "x";"x" as expected
is more complete than¶
the operator contains ?
to indicate that you are comparing the NULLity of the values
CREATE OR REPLACE FUNCTION is_more_complete_than("left" anyelement, "right" anyelement)
RETURNS boolean AS
$BODY$
SELECT $1 IS NOT DISTINCT FROM $2 OR ($1 IS NOT NULL AND $2 IS NULL)
$BODY$
LANGUAGE sql IMMUTABLE
COST 100;
CREATE OPERATOR ?>=(
PROCEDURE = is_more_complete_than,
LEFTARG = anyelement,
RIGHTARG = anyelement);
is populated more often than¶
the operator contains ?
to indicate that you are comparing the NULLity of the values, and *
to indicate a fuzzy match (similar to ~*
indicating case-insensitive regexp matching)
CREATE OR REPLACE FUNCTION is_populated_more_often_than("left" anyelement, "right" anyelement)
RETURNS boolean AS
$BODY$
SELECT $1 IS NOT NULL >= $2 IS NOT NULL -- true > false
$BODY$
LANGUAGE sql IMMUTABLE
COST 100;
CREATE OPERATOR ?*>=(
PROCEDURE = is_populated_more_often_than,
LEFTARG = anyelement,
RIGHTARG = anyelement);
maintaining derived-column formulas across column renames¶
- use a
CHECK
constraint, which will be auto-updated with column renames:-- the following code requires: table, col, expr SELECT util.derived_col_update((('table', 'col'), $$expr$$)); -- if table has data: SELECT util.derived_cols_populate('table');
- whenever columns are renamed, update the trigger function to use the
CHECK
constraint's updated formula:SELECT util.derived_cols_update('table');
nested transaction¶
should not be necessary, because an error will always abort and roll back all transactions that don't catch it (including any single-command autocommit transaction). because of this, a nested transaction is not automatically created for each function.
BEGIN -- nested transaction
...
EXCEPTION
WHEN SQLSTATE /*successful_completion*/'00000' THEN NULL; -- need at least one WHEN clause
END;
query plan fixing¶
use these commands if a query plan is incorrect:
-- uncomment additional options as needed
SET enable_seqscan = off;
SET join_collapse_limit = 1; -- turn it off
--SET enable_sort = off;
--SET enable_mergejoin = off;
--SET enable_hashjoin = off;
/*query*/
RESET enable_hashjoin;
RESET enable_mergejoin;
RESET enable_sort;
RESET join_collapse_limit;
RESET enable_seqscan;
sandbox block¶
BEGIN
__
-- roll back DB changes
PERFORM util.raise('ROLBK', 'sandbox block finished and rolled back');
EXCEPTION
WHEN SQLSTATE 'ROLBK' THEN NULL;
END;
vars for use in SELECT exprs¶
SELECT expr(var)
FROM (SELECT _ AS var) s
void
-returning sql
-language functions¶
SELECT func(_)
FROM inputs
;
SELECT NULL::void; -- don't fold away functions called in previous query
Profiling¶
function in for loop¶
SELECT function(v) FROM unnest(array_fill(arg_value, array[iter_ct])) f (v)
DB structure¶
Query columns for pgAdmin export¶
Need at least one data row for pgAdmin to enable CSV export
SELECT *
FROM
(
SELECT *, NULL::text AS _
FROM <query>
LIMIT 0
) q
RIGHT JOIN (VALUES (NULL)) v (_) USING (_)
Array starting index¶
SELECT array_lower(field, 1) FROM table WHERE field IS NOT NULL LIMIT 1
pg_temp
schemas in use¶
- query:
SELECT ('"'||schemaname||'"."'||tablename||'"')::regclass AS qual_table FROM pg_tables WHERE NOT schemaname = ANY (ARRAY[]::text[])
- If you get an error of the form:
ERROR: permission denied for schema pg_temp_#
add this schema insideARRAY[]
above and re-run the query until there are no more errors - The contents of
ARRAY[]
above are the pg_temp schemas in use
Adding covering indexes on foreign keys¶
- Run the following query (modified from Postgresql: Indexes on Foreign Keys):
SELECT conrelid::regclass ,conname ,reltuples::bigint FROM pg_constraint JOIN pg_class ON (conrelid = pg_class.oid) WHERE contype = 'f' AND NOT EXISTS ( SELECT 1 FROM pg_index WHERE indrelid = conrelid AND conkey[1] = indkey[0] AND indpred IS NULL ) AND reltuples > 0 -- limit results to only tables in use ORDER BY reltuples DESC ;
- For each result, add a covering index:
CREATE INDEX ON table (field);
SQL_ASCII
¶
CREATE DATABASE database WITH OWNER owner TEMPLATE template0 ENCODING 'SQL_ASCII' LC_COLLATE 'C' LC_CTYPE 'C';
UTF8
takes 1/3 longer than it:
129min UTF8 / (5929.75s * 1min/60s) SQL_ASCII = 1.33 = 1/3 longer for UTF8
UTF8
:Read 53310244 row(s) 6889.27user 225.13system 2:09:04elapsed 91%CPU (0avgtext+0avgdata 51824maxresident)k 0inputs+0outputs (0major+7311minor)pagefaults 0swaps
SQL_ASCII
:
(afterCREATE DATABASE ... SQL_ASCII
above)rm=1 t ./inputs/GBIF/raw_occurrence_record/run load_data 7309.16 real 4581.97 user 32.80 sys Read 53310244 row(s) 5929.75 real 4580.94 user 30.81 sys
Table stats¶
Highest value of integer index column¶
- also a fast way to get
count(*)
using a row number column
SELECT row_num
FROM joins
ORDER BY row_num DESC
LIMIT 1
DDL¶
Change fkeys to ON DELETE CASCADE on live DB¶
- in DDL export, replace text
withADD CONSTRAINT
ADD CONSTRAINT
grep -F "FOREIGN KEY" schema_export.sql >changes.sql
- In
changes.sql
, replace regexp
with^ALTER TABLE ONLY (\w+) ADD CONSTRAINT (\w+) FOREIGN KEY \((\w+)\) REFERENCES .*(?=;$)
ALTER TABLE $1 DROP CONSTRAINT $2; $0 ON UPDATE CASCADE ON DELETE CASCADE
make COMMENTs
start on their own line¶
makes sure every COMMENT
starts and ends on its own line, so that it appears correctly in the formats it's most likely to be read in (ie. in the DDL export, not the COMMENT
edit box in pgAdmin)
in DDL export:
- replace regexp
with^(COMMENT .*? IS '(?!$))((?s).*?';)$
$1 $2
- replace regexp
^(COMMENT .*? IS '(?s:.*?(.)))(';)$
with BeanShell snippet_2.equals("\n") ? _0 : _1+"\n"+_3
wrap function call in nested SELECT¶
needed when Postgres incorrectly does not constant-fold (inline) a function, leading to a slowdown if the function is therefore run many times
WARNING: not idempotent
WARNING: does not handle nested ()
correctly. these need to be manually reviewed and fixed.
replace vars below: [function]
, [schema]
- replace regexp
\b(?:[schema]\.)?[function]\(\S*?\)
with(SELECT $0)
- search for text
[function]
- for each occurrence, check that nested
()
have been handled correctly
- for each occurrence, check that nested
Data import¶
INSERT/ON DUPLICATE KEY SELECT¶
COPY FROM
column list verification¶
note that this approach does not require a separate programming language to do the CSV parsing of the header row, as we currently do it
COPY FROM
just the header row into a temp table of text columns, whose names are the same as the columns of the table to insert into- in the temp table's first row, check that the values match the column names:
- get an array of the temp table's column names
- create a SQL statement that makes an array of each column in the temp table, using the column names list above:
SELECT ARRAY[col1::text, col2::text, ...] FROM table
- run this SQL statement to produce a text array
- compare the two arrays
Data maintenance¶
merging pkeys which are referenced by fkeys¶
to merge pkey 2 into pkey 1:
- defer the pkey constraint
- rename pkey 1 to pkey 2
- all fkeys of both pkeys will now use pkey 2
- there are now two copies of pkey 2 in the main table
- rename one of the copies of pkey 2 to pkey 1, using
LIMIT 1
on theUPDATE
(or thectid
, althoughLIMIT 1
works just fine)- all fkeys will now use pkey 1, because the
UPDATE
cascades even though it is applied to only one copy of pkey 2 - the pkey 2 entry now has no fkeys pointing to it
- all fkeys will now use pkey 1, because the
- delete pkey 2
- because there are no fkeys pointing to it, the delete will not cascade
DB info¶
Data directory path¶
SHOW data_directory;
log file searching¶
sudo -s grep -hE 'pattern' /var/lib/postgresql/9.*/main/pg_log/postgresql-[date]-*
datatype support for hash indexes¶
CREATE TEMP TABLE t (col datatype);
CREATE INDEX ON t USING hash (col);
-- if error, no hash support for that datatype
Disk usage¶
Total disk usage¶
in Ubuntu:
(cd /var/lib/postgresql/9.*/; sudo du --summarize --human-readable main/base/)
temporary disk usage¶
the temporary space mechanism is Postgres's alternative to using the system swap space, which usually has a system limit that's less than the available disk space
in Ubuntu:
Postgres 9.3¶
WARNING: this also includes permanent disk space, in addition to temporary space used by a disk-intensive operation
note that listing just pgsql_tmp
is no longer sufficient to see all the temporary space, because a lot of it is now stored in the database's base/
subfolder
(cd /var/lib/postgresql/9.*/; sudo du --max-depth=1 --all --human-readable main/base/pgsql_tmp/) # temporary space for just temp tables (cd /var/lib/postgresql/9.*/; sudo du --max-depth=1 --all --human-readable main/base/*/) # temporary space for permanent tables (as well as permanent space)
Postgres 9.1¶
(cd /var/lib/postgresql/9.*/; sudo du --max-depth=1 --all --human-readable main/base/pgsql_tmp/) # temporary space for temp tables, and possibly also permanent tables
Schema sizes¶
SELECT
schema
, pg_size_pretty(total) AS total
, pg_size_pretty(relation) AS relation
, pg_size_pretty(indexes) AS indexes
FROM
(
SELECT
schema
, sum(pg_total_relation_size(qual_table))::bigint AS total
, sum(pg_relation_size(qual_table))::bigint AS relation
, sum(pg_indexes_size(qual_table))::bigint AS indexes
FROM
(
SELECT
schemaname AS schema
, tablename AS table
, ('"'||schemaname||'"."'||tablename||'"')::regclass AS qual_table
FROM pg_tables
WHERE schemaname NOT LIKE 'pg_%'
) s
GROUP BY schema
ORDER BY total DESC
) s
Index sizes¶
SELECT
qual_index
, is_unique
, pg_size_pretty(size) AS size
FROM
(
SELECT
qual_index
, is_unique
, pg_relation_size(qual_index)::bigint AS size
FROM
(
SELECT
('"'||nspname||'"."'||relname||'"')::regclass AS qual_index
, indisunique as is_unique
FROM pg_index
JOIN pg_class index ON index.oid = indexrelid
JOIN pg_namespace ON pg_namespace.oid = relnamespace
WHERE nspname NOT LIKE 'pg_%'
) s
ORDER BY size DESC
) s
Non-unique index sizes¶
SELECT
qual_index
, pg_size_pretty(size) AS size
FROM
(
SELECT
qual_index
, pg_relation_size(qual_index)::bigint AS size
FROM
(
SELECT
('"'||nspname||'"."'||relname||'"')::regclass AS qual_index
FROM pg_index
JOIN pg_class index ON index.oid = indexrelid
JOIN pg_namespace ON pg_namespace.oid = relnamespace
WHERE
nspname NOT LIKE 'pg_%'
AND NOT indisunique
) s
ORDER BY size DESC
) s