Project

General

Profile

« Previous | Next » 

Revision 10330

inputs/REMIB/Specimen/create.sql: moved filtering out of frameshifted rows to postprocess.sql, where it can happen in an idempotent DELETE. this allows filters to remove additional rows to easily be added on top of the existing filters, without needing to remake Specimen (which takes a long time, because of the many stage I derived columns that get added). the logical inversion inherent in the DELETE condition has been factored through rather than wrapped in NOT (...), because removal of frameshifted rows is more accurately specified as the detection of specific patterns that indicate frameshifting rather than the validation of all fields.

View differences:

inputs/REMIB/Specimen/create.sql
1
-- runtime: 7 s ("Time: 6888.394 ms")
2
SELECT *
3
FROM "Specimen.src"
4
WHERE
5
    acronym IS NOT NULL
6
AND coll_year ~ E'^(?:1[7-9]|20)\\d{2}$'
7
AND country !~ E'\\d'
8
AND (long_deg !~ E'[[:alpha:]]' OR long_deg IN ('RESTRINGIDO'))
1
SELECT * FROM "Specimen.src"
inputs/REMIB/Specimen/postprocess.sql
2 2

  
3 3
SELECT create_if_not_exists($$CREATE INDEX "Specimen.acronym" ON $$||:table_str||$$ (acronym)$$);
4 4

  
5
SELECT create_if_not_exists($$CREATE INDEX "Specimen.acronym"                ON $$||:table_str||$$ (acronym               )$$);
6
SELECT create_if_not_exists($$CREATE INDEX "Specimen.coll_year"              ON $$||:table_str||$$ (coll_year             )$$);
7
SELECT create_if_not_exists($$CREATE INDEX "Specimen.country_verbatim"       ON $$||:table_str||$$ (country_verbatim      )$$);
8
SELECT create_if_not_exists($$CREATE INDEX "Specimen.longitude_deg_verbatim" ON $$||:table_str||$$ (longitude_deg_verbatim)$$);
9

  
10
-- remove frameshifted rows
11
-- rerun time: 6 s ("Time: 5537.211 ms") thanks to index
12
DELETE FROM :table
13
WHERE
14
    acronym IS NULL
15
OR  coll_year !~ E'^(?:1[7-9]|20)\\d{2}$'
16
OR  country_verbatim ~ E'\\d'
17
OR  (longitude_deg_verbatim ~ E'[[:alpha:]]' AND longitude_deg_verbatim NOT IN ('RESTRINGIDO'))
18
;
19

  
5 20
-- Remove institutions that we have direct data for
6 21
DELETE FROM :table
7 22
WHERE acronym IN (

Also available in: Unified diff