Revision 10333
Added by Aaron Marcuse-Kubitza over 11 years ago
inputs/REMIB/Specimen/postprocess.sql | ||
---|---|---|
7 | 7 |
|
8 | 8 |
-- remove frameshifted rows |
9 | 9 |
-- rerun time: 6 s ("Time: 5537.211 ms") thanks to index |
10 |
DELETE FROM :table |
|
11 |
WHERE |
|
12 |
acronym IS NULL |
|
13 |
OR coll_year !~ E'^(?:1[7-9]|20)\\d{2}$' |
|
14 |
OR country ~ E'\\d' |
|
15 |
OR (long_deg ~ E'[[:alpha:]]' AND long_deg NOT IN ('RESTRINGIDO')) |
|
16 |
; |
|
10 |
DELETE FROM :table WHERE acronym IS NULL; |
|
11 |
DELETE FROM :table WHERE coll_year !~ E'^(?:1[7-9]|20)\\d{2}$'; |
|
12 |
DELETE FROM :table WHERE country ~ E'\\d'; |
|
13 |
DELETE FROM :table WHERE (long_deg ~ E'[[:alpha:]]' AND long_deg NOT IN ('RESTRINGIDO')); |
|
17 | 14 |
|
18 | 15 |
-- Remove institutions that we have direct data for |
19 | 16 |
DELETE FROM :table |
Also available in: Unified diff
bugfix: inputs/REMIB/Specimen/postprocess.sql: remove frameshifted rows: can't OR together conditions to determine rows to delete, because if any condition is NULL instead of true/false, this will NULL out the entire WHERE condition and prevent any other true conditions from causing a deletion. the best way to fix this is to use a separate DELETE statement for each condition, so that NULLs only impact that particular condition's DELETE. unlike using a modified, NULL-insensitive OR, which would prevent the use of index scans, this allows indexes to be used for conditions that support them.