Revision 6442
Added by Aaron Marcuse-Kubitza about 12 years ago
inputs/.geoscrub/geoscrub_cleaned_unique/create.sql | ||
---|---|---|
1 |
SELECT |
|
2 |
DISTINCT ON ( |
|
3 |
"countryVerbatim" |
|
4 |
, "stateProvinceVerbatim" |
|
5 |
, "countyParishVerbatim" |
|
6 |
, "latitudeDecimalVerbatim" |
|
7 |
, "longitudeDecimalVerbatim" |
|
8 |
) |
|
9 |
* |
|
10 |
FROM geoscrub |
|
11 |
WHERE |
|
12 |
NOT ( |
|
13 |
-- Empty coordinates |
|
14 |
"latitudeDecimalVerbatim" = 0 |
|
15 |
AND "longitudeDecimalVerbatim" = 0 |
|
16 |
) |
|
17 |
ORDER BY |
|
18 |
"countryVerbatim" |
|
19 |
, "stateProvinceVerbatim" |
|
20 |
, "countyParishVerbatim" |
|
21 |
, "latitudeDecimalVerbatim" |
|
22 |
, "longitudeDecimalVerbatim" |
|
23 |
, "geoscrubID" |
|
24 |
; |
|
25 |
|
|
26 |
ALTER TABLE :table ADD PRIMARY KEY ("geoscrubID"); |
|
27 |
ALTER TABLE :table ADD UNIQUE ("sourceID"); |
|
28 |
|
|
29 |
ALTER TABLE :table ALTER COLUMN "latitudeDecimalVerbatim" TYPE double precision; |
|
30 |
ALTER TABLE :table ALTER COLUMN "longitudeDecimalVerbatim" TYPE double precision; |
inputs/.geoscrub/geoscrub_cleaned_unique/header.csv | ||
---|---|---|
1 |
geoscrubID,sourceTable,sourceID,countryVerbatim,countryID,isInCountry,distErrCountry,stateProvinceVerbatim,stateProvinceID,isInStateProvince,distErrStateProvince,countyParishVerbatim,countyParishID,isInCountyParish,distErrCountyParish,elevationVerbatim,elevMaxVerbatim,elevMinVerbatim,latitudeDecimalVerbatim,longitudeDecimalVerbatim,isBadLatLong,badLatLongReason,latitudeDecimalCorrected,longitudeDecimalCorrected,latLongCorrectedRemarks,localityVerbatim,isCultivated,isCultivatedReason,countryStd,stateProvinceStd,isNewWorld,geoscrubID_idx_index,sourceTable_idx_index,sourceID_idx_index,Index_isCultivated_index,Index_isCultivatedReason_index,countryStd_index,stateProvinceStd_index,isNewWorld_index,countryID_index,stateProvinceID_index |
inputs/.geoscrub/geoscrub_cleaned_unique/map.csv | ||
---|---|---|
1 |
geoscrub,VegCore,Filter,Comments |
|
2 |
geoscrubID,OMIT,,"Using this would allow multiple occurrences of the same placename/coordinates combination, which would cause problems when datasources try to link up to the geoscrub places" |
|
3 |
sourceTable,OMIT,,Not geoscrub-related |
|
4 |
sourceID,OMIT,,Not needed because geoscrubID is provided |
|
5 |
countryVerbatim,country,, |
|
6 |
countryID,OMIT,,Not needed |
|
7 |
isInCountry,latLongInCountry,, |
|
8 |
distErrCountry,distanceToCountry_km,,"Assuming units based on the range of values and the circumference of the Earth [1]. |
|
9 |
|
|
10 |
[1] ""Circumference 40,075.017 km"" (http://en.wikipedia.org/wiki/Earth)" |
|
11 |
stateProvinceVerbatim,stateProvince,, |
|
12 |
stateProvinceID,OMIT,,Not needed |
|
13 |
isInStateProvince,latLongInStateProvince,, |
|
14 |
distErrStateProvince,distanceToStateProvince_km,,"Assuming units based on the range of values and the circumference of the Earth [1]. |
|
15 |
|
|
16 |
[1] ""Circumference 40,075.017 km"" (http://en.wikipedia.org/wiki/Earth)" |
|
17 |
countyParishVerbatim,OMIT,,Not geoscrubbed |
|
18 |
countyParishID,OMIT,,Not geoscrubbed |
|
19 |
isInCountyParish,UNUSED,, |
|
20 |
distErrCountyParish,UNUSED,, |
|
21 |
elevationVerbatim,OMIT,,Not geoscrub-related |
|
22 |
elevMaxVerbatim,OMIT,,Not geoscrub-related |
|
23 |
elevMinVerbatim,OMIT,,Not geoscrub-related |
|
24 |
latitudeDecimalVerbatim,decimalLatitude,, |
|
25 |
longitudeDecimalVerbatim,decimalLongitude,, |
|
26 |
isBadLatLong,latLongDomainInvalid,, |
|
27 |
badLatLongReason,*badLatLongReason,, |
|
28 |
latitudeDecimalCorrected,UNUSED,, |
|
29 |
longitudeDecimalCorrected,UNUSED,, |
|
30 |
latLongCorrectedRemarks,*latLongCorrectedRemarks,, |
|
31 |
localityVerbatim,OMIT,,Not geoscrubbed |
|
32 |
isCultivated,OMIT,,Not geoscrub-related |
|
33 |
isCultivatedReason,OMIT,,Not geoscrub-related |
|
34 |
countryStd,acceptedCountry,, |
|
35 |
stateProvinceStd,acceptedStateProvince,, |
|
36 |
isNewWorld,OMIT,,Not geoscrub-related |
|
37 |
geoscrubID_idx_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
38 |
sourceTable_idx_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
39 |
sourceID_idx_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
40 |
Index_isCultivated_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
41 |
Index_isCultivatedReason_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
42 |
countryStd_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
43 |
stateProvinceStd_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
44 |
isNewWorld_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
45 |
countryID_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
46 |
stateProvinceID_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
inputs/.geoscrub/geoscrub_cleaned_unique/test.xml.ref | ||
---|---|---|
1 |
Put template: |
|
2 |
<VegBIEN> |
|
3 |
<_simplifyPath id="-1"> |
|
4 |
<next>parent_id</next> |
|
5 |
<path> |
|
6 |
<location> |
|
7 |
<locationplace> |
|
8 |
<place_id> |
|
9 |
<place> |
|
10 |
<coordinates_id> |
|
11 |
<coordinates> |
|
12 |
<latitude_deg> |
|
13 |
<_nullIf> |
|
14 |
<null>0</null> |
|
15 |
<type>float</type> |
|
16 |
<value>$latitudeDecimalVerbatim</value> |
|
17 |
</_nullIf> |
|
18 |
</latitude_deg> |
|
19 |
<longitude_deg> |
|
20 |
<_nullIf> |
|
21 |
<null>0</null> |
|
22 |
<type>float</type> |
|
23 |
<value>$longitudeDecimalVerbatim</value> |
|
24 |
</_nullIf> |
|
25 |
</longitude_deg> |
|
26 |
</coordinates> |
|
27 |
</coordinates_id> |
|
28 |
<country>$countryVerbatim</country> |
|
29 |
<matched_place_id> |
|
30 |
<place> |
|
31 |
<source_id><source><shortname>geoscrub</shortname></source></source_id> |
|
32 |
<coordinates_id> |
|
33 |
<coordinates> |
|
34 |
<source_id><source><shortname>geoscrub</shortname></source></source_id> |
|
35 |
<latitude_deg> |
|
36 |
<_nullIf> |
|
37 |
<null>0</null> |
|
38 |
<type>float</type> |
|
39 |
<value>$latitudeDecimalVerbatim</value> |
|
40 |
</_nullIf> |
|
41 |
</latitude_deg> |
|
42 |
<longitude_deg> |
|
43 |
<_nullIf> |
|
44 |
<null>0</null> |
|
45 |
<type>float</type> |
|
46 |
<value>$longitudeDecimalVerbatim</value> |
|
47 |
</_nullIf> |
|
48 |
</longitude_deg> |
|
49 |
</coordinates> |
|
50 |
</coordinates_id> |
|
51 |
<matched_place_id>0</matched_place_id> |
|
52 |
<country>$countryStd</country> |
|
53 |
<stateprovince>$stateProvinceStd</stateprovince> |
|
54 |
<distance_to_country_m><_km_to_m><value>$distErrCountry</value></_km_to_m></distance_to_country_m> |
|
55 |
<distance_to_state_m><_km_to_m><value>$distErrStateProvince</value></_km_to_m></distance_to_state_m> |
|
56 |
<geovalid> |
|
57 |
<_and> |
|
58 |
<1>$isInCountry</1> |
|
59 |
<2>$isInStateProvince</2> |
|
60 |
</_and> |
|
61 |
</geovalid> |
|
62 |
</place> |
|
63 |
</matched_place_id> |
|
64 |
<stateprovince>$stateProvinceVerbatim</stateprovince> |
|
65 |
</place> |
|
66 |
</place_id> |
|
67 |
</locationplace> |
|
68 |
</location> |
|
69 |
</path> |
|
70 |
</_simplifyPath> |
|
71 |
</VegBIEN> |
|
72 |
Inserted 5 new rows into database |
inputs/.geoscrub/geoscrub_cleaned_unique/VegBIEN.csv | ||
---|---|---|
1 |
geoscrub,VegBIEN:/_simplifyPath:[next=parent_id]/path,Comments |
|
2 |
isInCountry,"/location/_if[@name=""if subplot""]/else/sourceaccessioncode/_if[@name=""if geovalidation""]/cond/_exists", |
|
3 |
latitudeDecimalVerbatim,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_nullIf:[null=0,type=float]/value", |
|
4 |
longitudeDecimalVerbatim,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_nullIf:[null=0,type=float]/value", |
|
5 |
countryVerbatim,/location/locationplace/*_id/place/country, |
|
6 |
latitudeDecimalVerbatim,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings" |
|
7 |
longitudeDecimalVerbatim,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings" |
|
8 |
countryStd,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub,matched_place_id=0]/country/_first/1", |
|
9 |
stateProvinceStd,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub,matched_place_id=0]/stateprovince/_first/1", |
|
10 |
countryVerbatim,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2, |
|
11 |
distErrCountry,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/distance_to_country_m/_km_to_m/value,"Assuming units based on the range of values and the circumference of the Earth [1]. |
|
12 |
|
|
13 |
[1] ""Circumference 40,075.017 km"" (http://en.wikipedia.org/wiki/Earth)" |
|
14 |
distErrStateProvince,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/distance_to_state_m/_km_to_m/value,"Assuming units based on the range of values and the circumference of the Earth [1]. |
|
15 |
|
|
16 |
[1] ""Circumference 40,075.017 km"" (http://en.wikipedia.org/wiki/Earth)" |
|
17 |
isInCountry,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_first/2/_and/1, |
|
18 |
isInStateProvince,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_first/2/_and/2, |
|
19 |
stateProvinceVerbatim,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2, |
|
20 |
isInCountry,"/location/locationplace/*_id/place/placecode/_if[@name=""if geovalidation""]/cond/_exists", |
|
21 |
stateProvinceVerbatim,/location/locationplace/*_id/place/stateprovince, |
|
22 |
Index_isCultivatedReason_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
|
23 |
Index_isCultivated_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
|
24 |
badLatLongReason,,** No join mapping for *badLatLongReason ** |
|
25 |
countryID,,** No non-empty join mapping for OMIT ** Not needed |
|
26 |
countryID_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
|
27 |
countryStd_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
|
28 |
countyParishID,,** No non-empty join mapping for OMIT ** Not geoscrubbed |
|
29 |
countyParishVerbatim,,** No non-empty join mapping for OMIT ** Not geoscrubbed |
|
30 |
distErrCountyParish,,** No non-empty join mapping for UNUSED ** |
|
31 |
elevMaxVerbatim,,** No non-empty join mapping for OMIT ** Not geoscrub-related |
|
32 |
elevMinVerbatim,,** No non-empty join mapping for OMIT ** Not geoscrub-related |
|
33 |
elevationVerbatim,,** No non-empty join mapping for OMIT ** Not geoscrub-related |
|
34 |
geoscrubID,,"** No non-empty join mapping for OMIT ** Using this would allow multiple occurrences of the same placename/coordinates combination, which would cause problems when datasources try to link up to the geoscrub places" |
|
35 |
geoscrubID_idx_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
|
36 |
isBadLatLong,,** No join mapping for latLongDomainInvalid ** |
|
37 |
isCultivated,,** No non-empty join mapping for OMIT ** Not geoscrub-related |
|
38 |
isCultivatedReason,,** No non-empty join mapping for OMIT ** Not geoscrub-related |
|
39 |
isInCountyParish,,** No non-empty join mapping for UNUSED ** |
|
40 |
isNewWorld,,** No non-empty join mapping for OMIT ** Not geoscrub-related |
|
41 |
isNewWorld_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
|
42 |
latLongCorrectedRemarks,,** No join mapping for *latLongCorrectedRemarks ** |
|
43 |
latitudeDecimalCorrected,,** No non-empty join mapping for UNUSED ** |
|
44 |
localityVerbatim,,** No non-empty join mapping for OMIT ** Not geoscrubbed |
|
45 |
longitudeDecimalCorrected,,** No non-empty join mapping for UNUSED ** |
|
46 |
sourceID,,** No non-empty join mapping for OMIT ** Not needed because geoscrubID is provided |
|
47 |
sourceID_idx_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
|
48 |
sourceTable,,** No non-empty join mapping for OMIT ** Not geoscrub-related |
|
49 |
sourceTable_idx_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
|
50 |
stateProvinceID,,** No non-empty join mapping for OMIT ** Not needed |
|
51 |
stateProvinceID_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
|
52 |
stateProvinceStd_index,,** No non-empty join mapping for OMIT ** Placeholder column created by MySQL to PostgreSQL translation |
inputs/.geoscrub/geoscrub_cleaned_unique/new_terms.csv | ||
---|---|---|
1 |
geoscrubID,OMIT,,"Using this would allow multiple occurrences of the same placename/coordinates combination, which would cause problems when datasources try to link up to the geoscrub places" |
|
2 |
sourceTable,OMIT,,Not geoscrub-related |
|
3 |
sourceID,OMIT,,Not needed because geoscrubID is provided |
|
4 |
countryVerbatim,country,, |
|
5 |
countryID,OMIT,,Not needed |
|
6 |
isInCountry,latLongInCountry,, |
|
7 |
distErrCountry,distanceToCountry_km,,"Assuming units based on the range of values and the circumference of the Earth [1]. |
|
8 |
|
|
9 |
[1] ""Circumference 40,075.017 km"" (http://en.wikipedia.org/wiki/Earth)" |
|
10 |
stateProvinceVerbatim,stateProvince,, |
|
11 |
stateProvinceID,OMIT,,Not needed |
|
12 |
isInStateProvince,latLongInStateProvince,, |
|
13 |
distErrStateProvince,distanceToStateProvince_km,,"Assuming units based on the range of values and the circumference of the Earth [1]. |
|
14 |
|
|
15 |
[1] ""Circumference 40,075.017 km"" (http://en.wikipedia.org/wiki/Earth)" |
|
16 |
countyParishVerbatim,OMIT,,Not geoscrubbed |
|
17 |
countyParishID,OMIT,,Not geoscrubbed |
|
18 |
isInCountyParish,UNUSED,, |
|
19 |
distErrCountyParish,UNUSED,, |
|
20 |
elevationVerbatim,OMIT,,Not geoscrub-related |
|
21 |
elevMaxVerbatim,OMIT,,Not geoscrub-related |
|
22 |
elevMinVerbatim,OMIT,,Not geoscrub-related |
|
23 |
latitudeDecimalVerbatim,decimalLatitude,, |
|
24 |
longitudeDecimalVerbatim,decimalLongitude,, |
|
25 |
isBadLatLong,latLongDomainInvalid,, |
|
26 |
latitudeDecimalCorrected,UNUSED,, |
|
27 |
longitudeDecimalCorrected,UNUSED,, |
|
28 |
localityVerbatim,OMIT,,Not geoscrubbed |
|
29 |
isCultivated,OMIT,,Not geoscrub-related |
|
30 |
isCultivatedReason,OMIT,,Not geoscrub-related |
|
31 |
countryStd,acceptedCountry,, |
|
32 |
stateProvinceStd,acceptedStateProvince,, |
|
33 |
geoscrubID_idx_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
34 |
sourceTable_idx_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
35 |
sourceID_idx_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
36 |
Index_isCultivated_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
37 |
Index_isCultivatedReason_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
38 |
countryStd_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
39 |
stateProvinceStd_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
40 |
isNewWorld_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
41 |
countryID_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
|
42 |
stateProvinceID_index,OMIT,,Placeholder column created by MySQL to PostgreSQL translation |
inputs/.geoscrub/geoscrub_cleaned_unique/unmapped_terms.csv | ||
---|---|---|
1 |
latLongDomainInvalid |
|
2 |
*badLatLongReason |
|
3 |
*latLongCorrectedRemarks |
inputs/.geoscrub/import_order.txt | ||
---|---|---|
1 |
geoscrub_cleaned_unique |
|
2 |
geoscrub_output |
Also available in: Unified diff
Removed no longer used geoscrub_cleaned_unique. Use geoscrub_output instead.