Project

General

Profile

« Previous | Next » 

Revision 12924

validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql: updated to DB, which pg_dump-formats the views

View differences:

trunk/validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql
7 7
SET search_path TO "NY";
8 8

  
9 9
CREATE VIEW _specimens_01_count_of_total_records_specimens_in_source_db AS
10
SELECT COUNT(*) "totalSpecimenRecords"
11
FROM "Ecatalog_all";
10
 SELECT count(*) AS "totalSpecimenRecords"
11
   FROM "Ecatalog_all";
12 12
COMMENT ON VIEW _specimens_01_count_of_total_records_specimens_in_source_db IS '
13 13
Check: should return 1 row
14 14
';
15 15

  
16 16
CREATE VIEW _specimens_02_count_of_unique_verbatim_families AS
17
SELECT COUNT(DISTINCT family) AS families FROM "Ecatalog_all" WHERE family IS NOT NULL;
17
 SELECT count(DISTINCT "Ecatalog_all".family) AS families
18
   FROM "Ecatalog_all"
19
  WHERE ("Ecatalog_all".family IS NOT NULL);
18 20
COMMENT ON VIEW _specimens_02_count_of_unique_verbatim_families IS '
19 21
Check: should return 1 row
20 22
';
21 23

  
22 24
CREATE VIEW _specimens_03_list_of_verbatim_families AS
23
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet"
24
FROM "Ecatalog_all"
25
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
25
 SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet"
26
   FROM "Ecatalog_all"
27
  WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
26 28
COMMENT ON VIEW _specimens_03_list_of_verbatim_families IS '
27 29
Check: should return same number of rows
28 30
';
29 31

  
30 32
CREATE VIEW _specimens_04_count_of_unique_verbatim_species_without_author AS
31
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet")) AS subspecies FROM "Ecatalog_all"
32
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL;
33
 SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet")) AS subspecies
34
   FROM "Ecatalog_all"
35
  WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL));
33 36
COMMENT ON VIEW _specimens_04_count_of_unique_verbatim_species_without_author IS '
34 37
Check: should return 1 row
35 38
';
36 39

  
37 40
CREATE VIEW _specimens_05_list_of_verbatim_species_excluding_author AS
38
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS subspecies FROM "Ecatalog_all"
39
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL;
41
 SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS subspecies
42
   FROM "Ecatalog_all"
43
  WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL));
40 44
COMMENT ON VIEW _specimens_05_list_of_verbatim_species_excluding_author IS '
41 45
Check: should return 3335 rows
42 46
';
43 47

  
44 48
CREATE VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author AS
45
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet", subspecies)) AS "specificEpithet" FROM "Ecatalog_all"
46
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
49
 SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet", "Ecatalog_all".subspecies)) AS "specificEpithet"
50
   FROM "Ecatalog_all"
51
  WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
47 52
COMMENT ON VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author IS '
48 53
Check: should return 1 row
49 54
';
50 55

  
51 56
CREATE VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author AS
52
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet"
53
FROM "Ecatalog_all"
54
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
57
 SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet"
58
   FROM "Ecatalog_all"
59
  WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
55 60
COMMENT ON VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author IS '
56 61
Check: should return 40145 rows
57 62
';
58 63

  
59 64
CREATE VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus AS
60
SELECT COUNT(DISTINCT TRIM(CONCAT_WS(' ',
61
TRIM(CONCAT_WS(' ', genus, IFNULL("specificEpithet",''), IFNULL(subspecies,''))),
62
TRIM(IFNULL("scientificNameAuthorship",''))
63
))) AS "fullScientificNames"
64
FROM "Ecatalog_all"
65
WHERE genus IS NOT NULL;
65
 SELECT count(DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text))))) AS "fullScientificNames"
66
   FROM "Ecatalog_all"
67
  WHERE ("Ecatalog_all".genus IS NOT NULL);
66 68
COMMENT ON VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus IS '
67 69
Check: should return 1 row
68 70
';
69 71

  
70 72
CREATE VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus AS
71
SELECT DISTINCT TRIM(CONCAT_WS(' ',
72
TRIM(CONCAT_WS(' ', genus, IFNULL("specificEpithet",''), IFNULL(subspecies,''))),
73
TRIM(IFNULL("scientificNameAuthorship",''))
74
)) AS "fullScientificName"
75
FROM "Ecatalog_all"
76
WHERE genus IS NOT NULL;
73
 SELECT DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text)))) AS "fullScientificName"
74
   FROM "Ecatalog_all"
75
  WHERE ("Ecatalog_all".genus IS NOT NULL);
77 76
COMMENT ON VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus IS '
78 77
Check: should return 45997 rows
79 78
';
80 79

  
81 80
CREATE VIEW _specimens_10_count_number_of_records_by_institution AS
82
SELECT specimen_duplicate_institutions, COUNT(*) AS records
83
FROM "Ecatalog_all"
84
GROUP BY specimen_duplicate_institutions;
81
 SELECT "Ecatalog_all".specimen_duplicate_institutions, 
82
    count(*) AS records
83
   FROM "Ecatalog_all"
84
  GROUP BY "Ecatalog_all".specimen_duplicate_institutions;
85 85
COMMENT ON VIEW _specimens_10_count_number_of_records_by_institution IS '
86 86
Note: Majority should be from ''NY''; these are standard herbarium acronyms
87 87

  
......
89 89
';
90 90

  
91 91
CREATE VIEW _specimens_11_list_of_three_standard_political_divisions AS
92
SELECT DISTINCT country, "stateProvince", county
93
FROM "Ecatalog_all";
92
 SELECT DISTINCT "Ecatalog_all".country, 
93
    "Ecatalog_all"."stateProvince", 
94
    "Ecatalog_all".county
95
   FROM "Ecatalog_all";
94 96
COMMENT ON VIEW _specimens_11_list_of_three_standard_political_divisions IS '
95 97
Note: character set issues may cause mis-matches. This query is a good way to reveal character set issues, either in source db or in BIEN
96 98

  
......
98 100
';
99 101

  
100 102
CREATE VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count AS
101
SELECT IFNULL("recordedBy",'') AS "collectorName", IFNULL("collectorNumber",'') AS "collectionNumber",
102
CONCAT_WS('-', "yearCollected", "monthCollected", "dayCollected") AS "dateCollected", COUNT(*) AS "specimenRecords"
103
FROM "Ecatalog_all"
104
GROUP BY "recordedBy", "collectorNumber", "dateCollected"
105
ORDER BY "recordedBy", "collectorNumber", "dateCollected";
103
 SELECT ifnull("Ecatalog_all"."recordedBy", ''::text) AS "collectorName", 
104
    ifnull("Ecatalog_all"."collectorNumber", ''::text) AS "collectionNumber", 
105
    concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected") AS "dateCollected", 
106
    count(*) AS "specimenRecords"
107
   FROM "Ecatalog_all"
108
  GROUP BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected")
109
  ORDER BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected");
106 110
COMMENT ON VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count IS '
107 111
Check: should return 309396 rows
108 112
';
109 113

  
110 114
CREATE VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long AS
111
SELECT
112
(
113
SELECT COUNT(*)
114
FROM "Ecatalog_all"
115
WHERE "decimalLatitude" IS NOT NULL
116
) AS "allLats",
117
(
118
SELECT COUNT(*)
119
FROM "Ecatalog_all"
120
WHERE "decimalLatitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
121
) AS "decimalLats",
122
(
123
SELECT COUNT(*)
124
FROM "Ecatalog_all"
125
WHERE "decimalLongitude" IS NOT NULL
126
) AS "allLongs",
127
(
128
SELECT COUNT(*)
129
FROM "Ecatalog_all"
130
WHERE "decimalLongitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
131
) AS "decimalLongs";
115
 SELECT ( SELECT count(*) AS count
116
           FROM "Ecatalog_all"
117
          WHERE ("Ecatalog_all"."decimalLatitude" IS NOT NULL)) AS "allLats", 
118
    ( SELECT count(*) AS count
119
           FROM "Ecatalog_all"
120
          WHERE ("Ecatalog_all"."decimalLatitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'::text)) AS "decimalLats", 
121
    ( SELECT count(*) AS count
122
           FROM "Ecatalog_all"
123
          WHERE ("Ecatalog_all"."decimalLongitude" IS NOT NULL)) AS "allLongs", 
124
    ( SELECT count(*) AS count
125
           FROM "Ecatalog_all"
126
          WHERE ("Ecatalog_all"."decimalLongitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'::text)) AS "decimalLongs";
132 127
COMMENT ON VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long IS '
133 128
Check: should return 1 row
134 129
';
135 130

  
136 131
CREATE VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long AS
137
SELECT
138
(
139
SELECT COUNT(*)
140
FROM "Ecatalog_all"
141
WHERE "decimalLatitude"::double precision>90 OR "decimalLatitude"::double precision<-90
142
) AS "badLats",
143
(
144
SELECT COUNT(*)
145
FROM "Ecatalog_all"
146
WHERE "decimalLongitude"::double precision>180 OR "decimalLongitude"::double precision<-180
147
) AS "badLongs";
132
 SELECT ( SELECT count(*) AS count
133
           FROM "Ecatalog_all"
134
          WHERE ((("Ecatalog_all"."decimalLatitude")::double precision > (90)::double precision) OR (("Ecatalog_all"."decimalLatitude")::double precision < ((-90))::double precision))) AS "badLats", 
135
    ( SELECT count(*) AS count
136
           FROM "Ecatalog_all"
137
          WHERE ((("Ecatalog_all"."decimalLongitude")::double precision > (180)::double precision) OR (("Ecatalog_all"."decimalLongitude")::double precision < ((-180))::double precision))) AS "badLongs";
148 138
COMMENT ON VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long IS '
149 139
Check: should return 1 row
150 140
';
151 141

  
152 142
CREATE VIEW _specimens_15_list_distinct_locality_descriptions AS
153
SELECT DISTINCT locality__main AS "localityDescription"
154
FROM "Ecatalog_all"
155
WHERE locality__main IS NOT NULL;
143
 SELECT DISTINCT "Ecatalog_all".locality__main AS "localityDescription"
144
   FROM "Ecatalog_all"
145
  WHERE ("Ecatalog_all".locality__main IS NOT NULL);
156 146
COMMENT ON VIEW _specimens_15_list_distinct_locality_descriptions IS '
157 147
Check: should return 125592 records
158 148
';
159 149

  
160 150
CREATE VIEW _specimens_16_list_distinct_specimen_descriptions AS
161
SELECT "specimenDescription" AS "specimenDescription"
162
FROM "Ecatalog_all"
163
WHERE "specimenDescription" IS NOT NULL;
151
 SELECT "Ecatalog_all"."specimenDescription"
152
   FROM "Ecatalog_all"
153
  WHERE ("Ecatalog_all"."specimenDescription" IS NOT NULL);
164 154
COMMENT ON VIEW _specimens_16_list_distinct_specimen_descriptions IS '
165 155
Note: specimens descriptions in nybg extract is in column "specimenDescription"
166 156

  

Also available in: Unified diff