Table of contents
Postgres queries

Postgres queries¶

Postgres snippets

Debugging¶

debug-print¶

`LANGUAGE sql`:¶

SELECT util.raise('NOTICE', '>>>'||expr);

`LANGUAGE plpgsql`:¶

PERFORM util.raise('NOTICE', '>>>'||expr);

debug-print caught exception¶

PERFORM util.raise('NOTICE', '>>>'||SQLERRM);

EXPLAIN from within function¶

requires the BIEN util schema

PERFORM util.explain2table($$
query
$$);

Programming constructs¶

anyelement wrapper around text function¶

CREATE OR REPLACE FUNCTION function(value anyelement)
  RETURNS anyelement AS
$BODY$
DECLARE
    result value%TYPE := function(value::text)::unknown;
BEGIN
    RETURN result;
END;
$BODY$
  LANGUAGE plpgsql IMMUTABLE
  COST 100;

comma operator¶

-- (a, b, c) -> c
(SELECT c FROM (SELECT a, b) s)

-- (a, b, void) -> void
(SELECT NULL::void FROM (SELECT
a
,
b
) s)

duplicates¶

SELECT column, COUNT(*) FROM table GROUP BY column HAVING COUNT(*) > 1 ORDER BY column

exception block¶

exception_type: see PostgreSQL Error Codes

BEGIN
    ...
EXCEPTION
WHEN exception_type THEN ...;
WHEN exception_type THEN NULL; -- suppress
WHEN   exception_type
    OR exception_type
    THEN NULL;
WHEN exception_type THEN -- selective suppress
    IF SQLERRM LIKE ... THEN NULL;
    ELSE RAISE; -- rethrow
    END IF;
WHEN OTHERS THEN ...;
END;

`EXISTS()`¶

SELECT NOT EXISTS( -- there is no row that is != _
SELECT NULL
FROM _
WHERE _ IS DISTINCT FROM _
LIMIT 1
);

expression substitution¶

EXPLAIN can be used to substitute constants, rename columns, and simplify expressions. unlike the equivalent Python code for renaming and simplifying, this is fully general and uses only Postgres.

the simplify-only case (with all vars table cols) is implemented in util.canon_sql()

eg. col_old + (const + 1), with const = 1 and col_old renamed to col_new, simplifies to col_new + 2:

-- the following code requires: renamings, constants, column types, result type

EXPLAIN
-- declare types (use WITH so original names *won't* be inlined, and to avoid `types.` prefix)
-- note that these must use the *new* column names
WITH types AS -- if WITH not supported, use temp table
(
    SELECT
      NULL::/*result type:*/integer AS result_type_null -- when using only table columns, use $$||util.typeof(expr_sql, NULL::table)||$$ for the type
    , /*col types:*/ NULL::/*col_old_type:*/integer AS col_new
    /*, (NULL::table).table_col_old AS table_col_new*/
    /*...*/
)
SELECT *
FROM
-- specify renamings and constants (use nested SELECT so they *will* be inlined)
-- with no renamings or constants, can also use just `types`
(
    SELECT
    -- with no renamings, can also use `*, constants...`
      result_type_null
    , /*renamings:*/ col_new AS col_old -- note reverse order, because we want to substitute col_new for col_old
    , /*constants:*/ 1 AS const -- to use a var from the surrounding code, use $$||util.quote_typed(var)||$$ instead of just var
    /*...*/
    FROM types
) s
WHERE (/*expr:*/ col_old + (const + 1) ) IS DISTINCT FROM result_type_null
    -- can't just use IS NOT NULL, or even `= result_type_null`, because these will simplify NULL exprs too far
;
-- then extract Filter expression from EXPLAIN output: (col_new + 2) = result_type_null
-- then extract result expression from Filter expression: col_new + 2

for-loop¶

DECLARE
    loop_var text = NULL;
BEGIN
    FOR loop_var IN
        SELECT ...
    LOOP
        RETURN QUERY
            -- ...
        ;
    END LOOP;
END;

FULL JOIN¶

SELECT * FROM
(VALUES (1), (2), (4)) v1 (col)
FULL JOIN
(VALUES (1), (3), (4)) v2 (col)
ON v1.col = v2.col -- can't use USING because that includes only the left column in the result
ORDER BY COALESCE(v1.col, v2.col) -- otherwise, the v2-only rows will come after all v1 rows

`hstore` a column list¶

unlike hstore(ROW(col1, col2)), this preserves the column names and can even apply aliases:

SELECT
(SELECT hstore(s) FROM (SELECT col1, col2 AS col2_alias, value AS col3) s) AS row_hstore
FROM table

if-statement in PL/pgSQL¶

IF cond THEN
    -- ...
ELSE
    -- ...
END IF;

if-statement in SQL¶

SELECT CASE WHEN _
THEN util.coalesce( -- perform both statements
      _::text
    , _::text
)::void
END

inlining¶

to test how an IMMUTABLE function will be inlined:

SELECT * FROM (VALUES (NULL)) v (c)
WHERE
func(c)
IS NOT NULL

in-place update¶

with UPDATE/VACUUM:

UPDATE table SET column = expr(column);
VACUUM ANALYZE VERBOSE table; -- needed because autovacuum sometimes doesn't run

with ALTER COLUMN TYPE USING:

ALTER TABLE table ALTER COLUMN column TYPE curr_type USING expr(column);

note that ALTER columns can't use concurrent ALTER columns' results:

CREATE TEMP TABLE abc (a text, b text);
INSERT INTO abc VALUES ('a', 'b');

ALTER TABLE abc
  ALTER COLUMN a TYPE text USING 'x'
, ALTER COLUMN b TYPE text USING a
;
SELECT * FROM abc;
-- "x";"a" not "x";"x" as expected

is more complete than¶

the operator contains ? to indicate that you are comparing the NULLity of the values

CREATE OR REPLACE FUNCTION is_more_complete_than("left" anyelement, "right" anyelement)
  RETURNS boolean AS
$BODY$
SELECT $1 IS NOT DISTINCT FROM $2 OR ($1 IS NOT NULL AND $2 IS NULL)
$BODY$
  LANGUAGE sql IMMUTABLE
  COST 100;

CREATE OPERATOR ?>=(
  PROCEDURE = is_more_complete_than,
  LEFTARG = anyelement,
  RIGHTARG = anyelement);

is populated more often than¶

the operator contains ? to indicate that you are comparing the NULLity of the values, and * to indicate a fuzzy match (similar to ~* indicating case-insensitive regexp matching)

CREATE OR REPLACE FUNCTION is_populated_more_often_than("left" anyelement, "right" anyelement)
  RETURNS boolean AS
$BODY$
SELECT $1 IS NOT NULL >= $2 IS NOT NULL -- true > false
$BODY$
  LANGUAGE sql IMMUTABLE
  COST 100;

CREATE OPERATOR ?*>=(
  PROCEDURE = is_populated_more_often_than,
  LEFTARG = anyelement,
  RIGHTARG = anyelement);

maintaining derived-column formulas across column renames¶

use a CHECK constraint, which will be auto-updated with column renames:

-- the following code requires: table, col, expr

SELECT util.derived_col_update((('table', 'col'), $$expr$$));
-- if table has data:
SELECT util.derived_cols_populate('table');

whenever columns are renamed, update the trigger function to use the CHECK constraint's updated formula:
```
SELECT util.derived_cols_update('table');
```

nested transaction¶

should not be necessary, because an error will always abort and roll back all transactions that don't catch it (including any single-command autocommit transaction). because of this, a nested transaction is not automatically created for each function.

BEGIN -- nested transaction
    ...
EXCEPTION
WHEN SQLSTATE /*successful_completion*/'00000' THEN NULL; -- need at least one WHEN clause
END;

query plan fixing¶

use these commands if a query plan is incorrect:

-- uncomment additional options as needed
SET enable_seqscan = off;
SET join_collapse_limit = 1; -- turn it off
--SET enable_sort = off;
--SET enable_mergejoin = off;
--SET enable_hashjoin = off;
/*query*/
RESET enable_hashjoin;
RESET enable_mergejoin;
RESET enable_sort;
RESET join_collapse_limit;
RESET enable_seqscan;

sandbox block¶

BEGIN
    __

    -- roll back DB changes
    PERFORM util.raise('ROLBK', 'sandbox block finished and rolled back');
EXCEPTION
WHEN SQLSTATE 'ROLBK' THEN NULL;
END;

vars for use in SELECT exprs¶

SELECT expr(var)
FROM (SELECT _ AS var) s

`void`-returning `sql`-language functions¶

SELECT func(_)
FROM inputs
;
SELECT NULL::void; -- don't fold away functions called in previous query

Profiling¶

function in for loop¶

SELECT function(v) FROM unnest(array_fill(arg_value, array[iter_ct])) f (v)

DB structure¶

Query columns for pgAdmin export¶

Need at least one data row for pgAdmin to enable CSV export

SELECT *
FROM
(
    SELECT *, NULL::text AS _
    FROM <query>
    LIMIT 0
) q
RIGHT JOIN (VALUES (NULL)) v (_) USING (_)

Array starting index¶

SELECT array_lower(field, 1) FROM table WHERE field IS NOT NULL LIMIT 1

`pg_temp` schemas in use¶

query:

SELECT ('"'||schemaname||'"."'||tablename||'"')::regclass AS qual_table
FROM pg_tables
WHERE NOT schemaname = ANY (ARRAY[]::text[])

If you get an error of the form:
```
ERROR:  permission denied for schema pg_temp_#
```
add this schema inside ARRAY[] above and re-run the query until there are no more errors
The contents of ARRAY[] above are the pg_temp schemas in use

Adding covering indexes on foreign keys¶

Run the following query (modified from Postgresql: Indexes on Foreign Keys):

SELECT conrelid::regclass
         ,conname
         ,reltuples::bigint
    FROM pg_constraint
         JOIN pg_class ON (conrelid = pg_class.oid)
   WHERE contype = 'f'
         AND NOT EXISTS (
           SELECT 1
             FROM pg_index
            WHERE indrelid = conrelid
                  AND conkey[1] = indkey[0] AND indpred IS NULL
         )
         AND reltuples > 0 -- limit results to only tables in use
ORDER BY reltuples DESC
;

For each result, add a covering index: CREATE INDEX ON table (field);

`SQL_ASCII`¶

CREATE DATABASE database WITH OWNER owner TEMPLATE template0 ENCODING 'SQL_ASCII' LC_COLLATE 'C' LC_CTYPE 'C';

UTF8 takes 1/3 longer than it:
129min UTF8 / (5929.75s * 1min/60s) SQL_ASCII = 1.33 = 1/3 longer for UTF8

UTF8:

Read 53310244 row(s)
6889.27user 225.13system 2:09:04elapsed 91%CPU (0avgtext+0avgdata 51824maxresident)k
0inputs+0outputs (0major+7311minor)pagefaults 0swaps

SQL_ASCII:
(after CREATE DATABASE ... SQL_ASCII above)

rm=1 t ./inputs/GBIF/raw_occurrence_record/run load_data 
     7309.16 real      4581.97 user        32.80 sys
Read 53310244 row(s)
     5929.75 real      4580.94 user        30.81 sys

Table stats¶

Highest value of integer index column¶

also a fast way to get count(*) using a row number column

SELECT row_num
FROM joins
ORDER BY row_num DESC
LIMIT 1

DDL¶

Change fkeys to ON DELETE CASCADE on live DB¶

in DDL export, replace text
```
    ADD CONSTRAINT
```
with
```
 ADD CONSTRAINT
```
grep -F "FOREIGN KEY" schema_export.sql >changes.sql

In changes.sql, replace regexp

^ALTER TABLE ONLY (\w+) ADD CONSTRAINT (\w+) FOREIGN KEY \((\w+)\) REFERENCES .*(?=;$)

with


ALTER TABLE $1 DROP CONSTRAINT $2;
$0 ON UPDATE CASCADE ON DELETE CASCADE

make `COMMENTs` start on their own line¶

makes sure every COMMENT starts and ends on its own line, so that it appears correctly in the formats it's most likely to be read in (ie. in the DDL export, not the COMMENT edit box in pgAdmin)

in DDL export:

replace regexp

^(COMMENT .*? IS '(?!$))((?s).*?';)$

with

$1
$2

replace regexp

^(COMMENT .*? IS '(?s:.*?(.)))(';)$

with BeanShell snippet

_2.equals("\n") ? _0 : _1+"\n"+_3

wrap function call in nested SELECT¶

needed when Postgres incorrectly does not constant-fold (inline) a function, leading to a slowdown if the function is therefore run many times

WARNING: not idempotent
WARNING: does not handle nested () correctly. these need to be manually reviewed and fixed.

replace vars below: [function], [schema]

replace regexp

\b(?:[schema]\.)?[function]\(\S*?\)

with

(SELECT $0)

search for text
```
[function]
```
1. for each occurrence, check that nested () have been handled correctly

Data import¶

INSERT/ON DUPLICATE KEY SELECT ¶

`COPY FROM` column list verification¶

note that this approach does not require a separate programming language to do the CSV parsing of the header row, as we currently do it

COPY FROM just the header row into a temp table of text columns, whose names are the same as the columns of the table to insert into
in the temp table's first row, check that the values match the column names:
1. get an array of the temp table's column names
2. create a SQL statement that makes an array of each column in the temp table, using the column names list above:
```
SELECT ARRAY[col1::text, col2::text, ...] FROM table
```
3. run this SQL statement to produce a text array
4. compare the two arrays

Data maintenance¶

merging pkeys which are referenced by fkeys¶

to merge pkey 2 into pkey 1:

defer the pkey constraint
rename pkey 1 to pkey 2
- all fkeys of both pkeys will now use pkey 2
- there are now two copies of pkey 2 in the main table
rename one of the copies of pkey 2 to pkey 1, using LIMIT 1 on the UPDATE (or the ctid, although LIMIT 1 works just fine)
- all fkeys will now use pkey 1, because the UPDATE cascades even though it is applied to only one copy of pkey 2
- the pkey 2 entry now has no fkeys pointing to it
delete pkey 2
- because there are no fkeys pointing to it, the delete will not cascade

DB info¶

Data directory path¶

SHOW data_directory;

log file searching¶

sudo -s
grep -hE 'pattern' /var/lib/postgresql/9.*/main/pg_log/postgresql-[date]-*

datatype support for hash indexes¶

CREATE TEMP TABLE t (col datatype);
CREATE INDEX ON t USING hash (col);
-- if error, no hash support for that datatype

Disk usage¶

Total disk usage¶

in Ubuntu:

(cd /var/lib/postgresql/9.*/; sudo du --summarize --human-readable main/base/)

temporary disk usage¶

the temporary space mechanism is Postgres's alternative to using the system swap space, which usually has a system limit that's less than the available disk space

in Ubuntu:

Postgres 9.3¶

WARNING: this also includes permanent disk space, in addition to temporary space used by a disk-intensive operation

note that listing just pgsql_tmp is no longer sufficient to see all the temporary space, because a lot of it is now stored in the database's base/ subfolder

(cd /var/lib/postgresql/9.*/; sudo du --max-depth=1 --all --human-readable main/base/pgsql_tmp/) # temporary space for just temp tables
(cd /var/lib/postgresql/9.*/; sudo du --max-depth=1 --all --human-readable main/base/*/) # temporary space for permanent tables (as well as permanent space)

Postgres 9.1¶

(cd /var/lib/postgresql/9.*/; sudo du --max-depth=1 --all --human-readable main/base/pgsql_tmp/) # temporary space for temp tables, and possibly also permanent tables

Schema sizes¶

SELECT
schema
, pg_size_pretty(total) AS total
, pg_size_pretty(relation) AS relation
, pg_size_pretty(indexes) AS indexes
FROM
(
    SELECT
    schema
    , sum(pg_total_relation_size(qual_table))::bigint AS total
    , sum(pg_relation_size(qual_table))::bigint AS relation
    , sum(pg_indexes_size(qual_table))::bigint AS indexes
    FROM
    (
        SELECT
        schemaname AS schema
        , tablename AS table
        , ('"'||schemaname||'"."'||tablename||'"')::regclass AS qual_table
        FROM pg_tables
        WHERE schemaname NOT LIKE 'pg_%'
    ) s
    GROUP BY schema
    ORDER BY total DESC
) s

Index sizes¶

SELECT
qual_index
, is_unique
, pg_size_pretty(size) AS size
FROM
(
    SELECT
    qual_index
    , is_unique
    , pg_relation_size(qual_index)::bigint AS size
    FROM
    (
        SELECT
        ('"'||nspname||'"."'||relname||'"')::regclass AS qual_index
        , indisunique as is_unique
        FROM pg_index
        JOIN pg_class index ON index.oid = indexrelid
        JOIN pg_namespace ON pg_namespace.oid = relnamespace
        WHERE nspname NOT LIKE 'pg_%'
    ) s
    ORDER BY size DESC
) s

Non-unique index sizes¶

SELECT
qual_index
, pg_size_pretty(size) AS size
FROM
(
    SELECT
    qual_index
    , pg_relation_size(qual_index)::bigint AS size
    FROM
    (
        SELECT
        ('"'||nspname||'"."'||relname||'"')::regclass AS qual_index
        FROM pg_index
        JOIN pg_class index ON index.oid = indexrelid
        JOIN pg_namespace ON pg_namespace.oid = relnamespace
        WHERE
        nspname NOT LIKE 'pg_%'
        AND NOT indisunique
    ) s
    ORDER BY size DESC
) s

Files (0)

Project

General

Profile

Wiki

Postgres queries¶

Debugging¶

debug-print¶

LANGUAGE sql:¶

LANGUAGE plpgsql:¶

debug-print caught exception¶

EXPLAIN from within function¶

Programming constructs¶

anyelement wrapper around text function¶

comma operator¶

duplicates¶

exception block¶

EXISTS()¶

expression substitution¶

for-loop¶

FULL JOIN¶

hstore a column list¶

if-statement in PL/pgSQL¶

if-statement in SQL¶

inlining¶

in-place update¶

is more complete than¶

is populated more often than¶

maintaining derived-column formulas across column renames¶

nested transaction¶

query plan fixing¶

sandbox block¶

vars for use in SELECT exprs¶

void-returning sql-language functions¶

Profiling¶

function in for loop¶

DB structure¶

Query columns for pgAdmin export¶

Array starting index¶

pg_temp schemas in use¶

Adding covering indexes on foreign keys¶

SQL_ASCII¶

Table stats¶

Highest value of integer index column¶

DDL¶

Change fkeys to ON DELETE CASCADE on live DB¶

make COMMENTs start on their own line¶

wrap function call in nested SELECT¶

Data import¶

INSERT/ON DUPLICATE KEY SELECT¶

COPY FROM column list verification¶

Data maintenance¶

merging pkeys which are referenced by fkeys¶

DB info¶

Data directory path¶

log file searching¶

datatype support for hash indexes¶

Disk usage¶

Total disk usage¶

temporary disk usage¶

Postgres 9.3¶

Postgres 9.1¶

Schema sizes¶

Index sizes¶

Non-unique index sizes¶

`LANGUAGE sql`:¶

`LANGUAGE plpgsql`:¶

`EXISTS()`¶

`hstore` a column list¶

`void`-returning `sql`-language functions¶

`pg_temp` schemas in use¶

`SQL_ASCII`¶

make `COMMENTs` start on their own line¶

INSERT/ON DUPLICATE KEY SELECT ¶

`COPY FROM` column list verification¶