Project

General

Profile

« Previous | Next » 

Revision 4844

input.Makefile: Maps building: Use new mappings/VegCore.csv as the VegCore vocabulary to canonicalize on, in order to also canonicalize VegCore terms which are not yet mapped to VegBIEN. This results in several DwC terms getting their case standardized according to http://rs.tdwg.org/dwc/terms/. Continue to determine unmapped terms using mappings/VegCore-VegBIEN.csv, because a term should not be considered mapped until it has been mapped all the way through to VegBIEN.

View differences:

inputs/XAL/Specimen/map.csv
4 4
darwin:CollectionCode,collectionCode,,
5 5
darwin:CatalogNumber,catalogNumber,,
6 6
darwin:ScientificName,scientificName,,
7
darwin:BasisOfRecord,BasisOfRecord,,
7
darwin:BasisOfRecord,basisOfRecord,,
8 8
darwin:Kingdom,kingdom,,
9 9
darwin:Phylum,phylum,,
10 10
darwin:Class,class,,
......
18 18
darwin:YearIdentified,yearIdentified,,
19 19
darwin:MonthIdentified,monthIdentified,,
20 20
darwin:DayIdentified,dayIdentified,,
21
darwin:TypeStatus,TypeStatus,,
21
darwin:TypeStatus,typeStatus,,
22 22
darwin:CollectorNumber,recordNumber,,
23 23
darwin:FieldNumber,UNUSED,,"`grep -F ""<darwin:FieldNumber>"" inputs/XAL/src/digir.specimens.xml` returns no lines. This historical DwC term (http://rs.tdwg.org/dwc/terms/history/index.htm#fieldNumber-2009-04-24) has close to the same meaning as recordNumber (http://rs.tdwg.org/dwc/terms/#recordNumber)."
24 24
darwin:Collector,recordedBy,,
inputs/XAL/Specimen/VegBIEN.csv
44 44
darwin:Country,/location/locationplace/*_id/placepath/country,
45 45
darwin:County,/location/locationplace/*_id/placepath/county,
46 46
darwin:StateProvince,/location/locationplace/*_id/placepath/stateprovince,
47
darwin:BasisOfRecord,,** No join mapping for BasisOfRecord ** 
47
darwin:BasisOfRecord,,** No join mapping for basisOfRecord ** 
48 48
darwin:DateLastModified,,** No join mapping for dcterms:modified ** 
49 49
darwin:FieldNumber,,"** No join mapping for UNUSED ** `grep -F ""<darwin:FieldNumber>"" inputs/XAL/src/digir.specimens.xml` returns no lines. This historical DwC term (http://rs.tdwg.org/dwc/terms/history/index.htm#fieldNumber-2009-04-24) has close to the same meaning as recordNumber (http://rs.tdwg.org/dwc/terms/#recordNumber)."
50 50
darwin:JulianDay,,** No join mapping for day ** 
......
53 53
darwin:RelatedCatalogItem,,** No join mapping for relatedResourceID ** 
54 54
darwin:RelationshipType,,** No join mapping for relationshipOfResource ** 
55 55
darwin:TimeOfDay,,** No join mapping for eventTime ** 
56
darwin:TypeStatus,,** No join mapping for TypeStatus ** 
56
darwin:TypeStatus,,** No join mapping for typeStatus ** 
inputs/XAL/Specimen/unmapped_terms.csv
1 1
dcterms:modified
2
BasisOfRecord
3
TypeStatus
2
basisOfRecord
3
typeStatus
4 4
UNUSED
5 5
day
6 6
eventTime
inputs/NY/Specimen/map.csv
5 5
CollectionCode,collectionCode,,
6 6
CatalogNumber,catalogNumber,,
7 7
ScientificName,scientificName,,
8
BasisOfRecord,BasisOfRecord,,
8
BasisOfRecord,basisOfRecord,,
9 9
Kingdom,kingdom,,
10 10
Phylum,phylum,,
11 11
Class,class,,
......
19 19
YearIdentified,yearIdentified,,
20 20
MonthIdentified,monthIdentified,,
21 21
DayIdentified,dayIdentified,,
22
TypeStatus,TypeStatus,,
22
TypeStatus,typeStatus,,
23 23
CollectorNumber,UNUSED,,
24 24
FieldNumber,recordNumber,,Usage inconsistent with DwC definition
25 25
Collector,recordedBy,,
inputs/NY/Specimen/VegBIEN.csv
52 52
Country,/location/locationplace/*_id/placepath/country,
53 53
County,/location/locationplace/*_id/placepath/county,
54 54
StateProvince,/location/locationplace/*_id/placepath/stateprovince,
55
BasisOfRecord,,** No join mapping for BasisOfRecord ** 
55
BasisOfRecord,,** No join mapping for basisOfRecord ** 
56 56
CollectorNumber,,** No join mapping for UNUSED ** 
57 57
DateLastModified,,** No join mapping for dcterms:modified ** 
58 58
JulianDay,,** No join mapping for day ** 
......
61 61
RelatedCatalogItem,,** No join mapping for relatedResourceID ** 
62 62
RelationshipType,,** No join mapping for relationshipOfResource ** 
63 63
TimeOfDay,,** No join mapping for eventTime ** 
64
TypeStatus,,** No join mapping for TypeStatus ** 
64
TypeStatus,,** No join mapping for typeStatus ** 
65 65
key,,** No join mapping for OMIT ** UniqueNYInternalRecordNumber is a better pkey
inputs/NY/Specimen/unmapped_terms.csv
1 1
OMIT
2 2
dcterms:modified
3
BasisOfRecord
4
TypeStatus
3
basisOfRecord
4
typeStatus
5 5
UNUSED
6 6
day
7 7
eventTime
inputs/U/Specimen/map.csv
28 28
Alt,Alt,,What is this?
29 29
Alt2,Alt2,,What is this?
30 30
Ecology,habitat,,
31
Phenology,ReproductiveCondition,,
31
Phenology,reproductiveCondition,,
32 32
Notes,fieldNotes,,
inputs/U/Specimen/VegBIEN.csv
34 34
EW,,** No join mapping for EW ** 
35 35
Majorarea,,** No join mapping for Majorarea ** 
36 36
NS,,** No join mapping for NS ** 
37
Phenology,,** No join mapping for ReproductiveCondition ** 
37
Phenology,,** No join mapping for reproductiveCondition ** 
38 38
Prefix,,** No join mapping for UNUSED ** 
39 39
Suffix,,** No join mapping for UNUSED ** 
inputs/U/Specimen/unmapped_terms.csv
7 7
EW
8 8
Alt
9 9
Alt2
10
ReproductiveCondition
10
reproductiveCondition
inputs/ARIZ/Specimen/map.csv
1 1
ARIZ,VegCore,Filter,Comments
2 2
GlobalUniqueIdentifier,occurrenceID,,
3 3
DateLastModified,dcterms:modified,,
4
BasisOfRecord,BasisOfRecord,,
4
BasisOfRecord,basisOfRecord,,
5 5
InstitutionCode,institutionCode,,
6 6
CollectionCode,collectionCode,,
7 7
CatalogNumber,catalogNumber,/_alt/2,Don't overwrite CatalogNumberNumeric
......
18 18
VerbatimCollectingDate,dateCollected,/_alt/0,
19 19
FieldNotes,fieldNotes,,
20 20
JulianDay,day,,
21
HigherGeography,HigherGeography,,
21
HigherGeography,higherGeography,,
22 22
ContinentOcean,continent,,
23
IslandGroup,IslandGroup,,
24
Island,Island,,
23
IslandGroup,islandGroup,,
24
Island,island,,
25 25
Country,country,,
26 26
StateProvince,stateProvince,,
27 27
County,county,,
......
41 41
VerbatimElevation,UNUSED,,
42 42
MinimumDepthInMeters,minimumDepthInMeters,,
43 43
MaximumDepthInMeters,maximumDepthInMeters,,
44
VerbatimDepth,VerbatimDepth,,
44
VerbatimDepth,verbatimDepth,,
45 45
ScientificName,scientificName,/_alt/2,
46 46
Kingdom,kingdom,,
47 47
Phylum,phylum,,
......
57 57
YearIdentified,yearIdentified,,
58 58
MonthIdentified,monthIdentified,,
59 59
DayIdentified,dayIdentified,,
60
TypeStatus,TypeStatus,,
60
TypeStatus,typeStatus,,
61 61
Sex,sex,,
62
Preparations,Preparations,,
62
Preparations,preparations,,
63 63
Tissues,UNUSED,,
64 64
IndividualCount,individualCount,,
65 65
AgeClass,lifeStage,,
inputs/ARIZ/Specimen/VegBIEN.csv
58 58
County,/location/locationplace/*_id/placepath/county,
59 59
StateProvince,/location/locationplace/*_id/placepath/stateprovince,
60 60
AgeClass,,** No join mapping for lifeStage ** 
61
BasisOfRecord,,** No join mapping for BasisOfRecord ** 
61
BasisOfRecord,,** No join mapping for basisOfRecord ** 
62 62
DateLastModified,,** No join mapping for dcterms:modified ** 
63 63
FieldNumber,,"** No join mapping for OMIT ** Usage inconsistent with DwC definition: meaning is recordNumber. Identical to CollectorNumber [1], so does not need to be mapped.
64 64

  
65 65
[1] The following query returns no rows: SELECT * FROM ""ARIZ"".""Specimen"" WHERE ""CollectorNumber"" IS DISTINCT FROM ""FieldNumber"""
66 66
GenBankNum,,"** No join mapping for associatedSequences/_label[label=""GenBank""]/value ** "
67 67
GeorefMethod,,** No join mapping for georeferenceProtocol ** 
68
HigherGeography,,** No join mapping for HigherGeography ** 
68
HigherGeography,,** No join mapping for higherGeography ** 
69 69
HorizontalDatum,,** No join mapping for UNUSED ** 
70
Island,,** No join mapping for Island ** 
71
IslandGroup,,** No join mapping for IslandGroup ** 
70
Island,,** No join mapping for island ** 
71
IslandGroup,,** No join mapping for islandGroup ** 
72 72
JulianDay,,** No join mapping for day ** 
73 73
LatLongComments,,** No join mapping for georeferenceRemarks ** 
74 74
OriginalCoordinateSystem,,** No join mapping for verbatimSRS ** 
75 75
OtherCatalogNumbers,,** No join mapping for relatedResourceID/_alt/1 ** 
76
Preparations,,** No join mapping for Preparations ** 
76
Preparations,,** No join mapping for preparations ** 
77 77
RelatedCatalogedItems,,** No join mapping for relatedResourceID/_alt/2 ** Used only when OtherCatalogNumbers is NULL
78 78
TimeCollected,,** No join mapping for eventTime ** 
79 79
Tissues,,** No join mapping for UNUSED ** 
80
TypeStatus,,** No join mapping for TypeStatus ** 
81
VerbatimDepth,,** No join mapping for VerbatimDepth ** 
80
TypeStatus,,** No join mapping for typeStatus ** 
81
VerbatimDepth,,** No join mapping for verbatimDepth ** 
82 82
VerbatimElevation,,** No join mapping for UNUSED ** 
inputs/ARIZ/Specimen/new_terms.csv
9 9
Tissues
10 10
AgeClass
11 11
GenBankNum
12
OtherCatalogNumbers
13 12
RelatedCatalogedItems
inputs/ARIZ/Specimen/unmapped_terms.csv
1 1
dcterms:modified
2
BasisOfRecord
2
basisOfRecord
3 3
OMIT
4 4
eventTime
5 5
day
6
HigherGeography
7
IslandGroup
8
Island
6
higherGeography
7
islandGroup
8
island
9 9
UNUSED
10 10
verbatimSRS
11 11
georeferenceProtocol
12 12
georeferenceRemarks
13 13
UNUSED
14
VerbatimDepth
15
TypeStatus
16
Preparations
14
verbatimDepth
15
typeStatus
16
preparations
17 17
UNUSED
18 18
lifeStage
19 19
associatedSequences
inputs/SpeciesLink/Specimen/map.csv
1 1
SpeciesLink,VegCore,Filter,Comments
2 2
dwc_dwcore_GlobalUniqueIdentifier,occurrenceID,,
3 3
dwc_dwcore_DateLastModified,dcterms:modified,,
4
dwc_dwcore_BasisOfRecord,BasisOfRecord,,
4
dwc_dwcore_BasisOfRecord,basisOfRecord,,
5 5
dwc_dwcore_InstitutionCode,institutionCode,,
6 6
dwc_dwcore_CollectionCode,collectionCode,,
7 7
dwc_dwcore_CatalogNumber,catalogNumber,,
......
35 35
SELECT count(*) FROM ""SpeciesLink"".specimens
36 36
WHERE COALESCE(""dwc_curatorial_FieldNumber"", ""dwc_terms_fieldNumber"", ""conceptual_darwin_2003_1_0_FieldNumber"") != ""dwc_terms_recordNumber""
37 37
-----"
38
dwc_curatorial_TypeStatus,TypeStatus,,
38
dwc_curatorial_TypeStatus,typeStatus,,
39 39
dwc_curatorial_IndividualCount,individualCount,,
40 40
dwc_geospatial_DecimalLatitude,decimalLatitude,,
41 41
dwc_geospatial_DecimalLongitude,decimalLongitude,,
......
97 97
conceptual_darwin_2003_1_0_CollectionCode,collectionCode,,
98 98
conceptual_darwin_2003_1_0_CatalogNumber,catalogNumber,,
99 99
conceptual_darwin_2003_1_0_ScientificName,scientificName,,
100
conceptual_darwin_2003_1_0_BasisOfRecord,BasisOfRecord,,
100
conceptual_darwin_2003_1_0_BasisOfRecord,basisOfRecord,,
101 101
conceptual_darwin_2003_1_0_Kingdom,kingdom,,
102 102
conceptual_darwin_2003_1_0_Phylum,phylum,,
103 103
conceptual_darwin_2003_1_0_Class,class,,
......
111 111
conceptual_darwin_2003_1_0_YearIdentified,yearIdentified,,
112 112
conceptual_darwin_2003_1_0_MonthIdentified,monthIdentified,,
113 113
conceptual_darwin_2003_1_0_DayIdentified,dayIdentified,,
114
conceptual_darwin_2003_1_0_TypeStatus,TypeStatus,,
114
conceptual_darwin_2003_1_0_TypeStatus,typeStatus,,
115 115
conceptual_darwin_2003_1_0_CollectorNumber,recordNumber,/_alt/3,
116 116
conceptual_darwin_2003_1_0_FieldNumber,recordNumber,/_alt/2,"Usage inconsistent with DwC definition. This field is instead identical to recordNumber, as the following query returns zero: (takes 23 sec)
117 117
-----
inputs/SpeciesLink/Specimen/VegBIEN.csv
161 161
conceptual_darwin_2003_1_0_StateProvince,/location/locationplace/*_id/placepath/stateprovince,
162 162
dwc_dwcore_StateProvince,/location/locationplace/*_id/placepath/stateprovince,
163 163
dwc_terms_stateProvince,/location/locationplace/*_id/placepath/stateprovince,
164
conceptual_darwin_2003_1_0_BasisOfRecord,,** No join mapping for BasisOfRecord ** 
164
conceptual_darwin_2003_1_0_BasisOfRecord,,** No join mapping for basisOfRecord ** 
165 165
conceptual_darwin_2003_1_0_DateLastModified,,** No join mapping for dcterms:modified ** 
166 166
conceptual_darwin_2003_1_0_JulianDay,,** No join mapping for day/_alt/1 ** 
167 167
conceptual_darwin_2003_1_0_PreparationType,,** No join mapping for preparations ** 
......
169 169
conceptual_darwin_2003_1_0_RelatedCatalogItem,,** No join mapping for relatedResourceID ** 
170 170
conceptual_darwin_2003_1_0_RelationshipType,,** No join mapping for relationshipOfResource ** 
171 171
conceptual_darwin_2003_1_0_TimeOfDay,,** No join mapping for eventTime ** 
172
conceptual_darwin_2003_1_0_TypeStatus,,** No join mapping for TypeStatus ** 
173
dwc_curatorial_TypeStatus,,** No join mapping for TypeStatus ** 
174
dwc_dwcore_BasisOfRecord,,** No join mapping for BasisOfRecord ** 
172
conceptual_darwin_2003_1_0_TypeStatus,,** No join mapping for typeStatus ** 
173
dwc_curatorial_TypeStatus,,** No join mapping for typeStatus ** 
174
dwc_dwcore_BasisOfRecord,,** No join mapping for basisOfRecord ** 
175 175
dwc_dwcore_DateLastModified,,** No join mapping for dcterms:modified ** 
176 176
dwc_dwcore_DayOfYear,,** No join mapping for day/_alt/2 ** 
177 177
dwc_terms_basisOfRecord,,** No join mapping for basisOfRecord ** 
inputs/SpeciesLink/Specimen/unmapped_terms.csv
1 1
dcterms:modified
2
BasisOfRecord
2
basisOfRecord
3 3
day
4
TypeStatus
4
typeStatus
5 5
dcterms:modified
6 6
basisOfRecord
7 7
typeStatus
......
15 15
month
16 16
day
17 17
dcterms:modified
18
BasisOfRecord
19
TypeStatus
18
basisOfRecord
19
typeStatus
20 20
day
21 21
eventTime
22 22
preparations
inputs/MO/Specimen/map.csv
4 4
InstitutionCode,institutionCode,,
5 5
CollectionCode,collectionCode,,
6 6
ScientificName,scientificName,,
7
BasisOfRecord,BasisOfRecord,,
7
BasisOfRecord,basisOfRecord,,
8 8
Kingdom,kingdom,,
9 9
family,family,,
10 10
genus,genus,,
......
12 12
Subspecies,subspecies,,
13 13
ScientificNameAuthor,scientificNameAuthorship,,
14 14
IdentifiedBy,identifiedBy,,
15
TypeStatus,TypeStatus,,
15
TypeStatus,typeStatus,,
16 16
CollectorNumber,recordNumber,,
17 17
Collector,recordedBy,,
18 18
DayCollected,dayCollected,,
inputs/MO/Specimen/VegBIEN.csv
35 35
Country,/location/locationplace/*_id/placepath/country,
36 36
County,/location/locationplace/*_id/placepath/county,
37 37
StateProvince,/location/locationplace/*_id/placepath/stateprovince,
38
BasisOfRecord,,** No join mapping for BasisOfRecord ** 
38
BasisOfRecord,,** No join mapping for basisOfRecord ** 
39 39
DateLastModified,,** No join mapping for dcterms:modified ** 
40
TypeStatus,,** No join mapping for TypeStatus ** 
40
TypeStatus,,** No join mapping for typeStatus ** 
inputs/input.Makefile
137 137
anyTest = $*/test.%
138 138
srcsOnly = $(filter-out $(anyMap) $(anyTest) %/logs,$(call dataOnly,$(1)))
139 139

  
140
vocab := $(mappings)/VegCore.csv
140 141
coreMap := $(mappings)/VegCore-VegBIEN.csv
141 142
dict := $(mappings)/Veg+-VegCore.csv
142 143

  
......
251 252

  
252 253
# Via maps cleanup
253 254
ifneq ($(filter %/.map.csv.last_cleanup,$(MAKECMDGOALS)),)
254
%/.map.csv.last_cleanup: %/map.csv $(coreMap) $(dict)
255
	$(bin)/in_place $< $(bin)/canon 1 $(coreMap)
255
%/.map.csv.last_cleanup: %/map.csv $(vocab) $(dict)
256
	$(bin)/in_place $< $(bin)/canon 1 $(vocab)
256 257
	$(bin)/in_place $< $(bin)/canon 1 $(dict)
257 258
	$(bin)/in_place $< $(bin)/translate 1 $(dict)
258 259
	touch $@
......
285 286
%/unmapped_terms.csv: %/map.csv $(coreMap)
286 287
	$(call newTerms,1,|$(bin)/filter_out_ci 0 $(coreMap))
287 288

  
288
%/new_terms.csv: %/map.csv $(coreMap) $(dict) %/unmapped_terms.csv
289
%/new_terms.csv: %/map.csv $(vocab) $(dict) %/unmapped_terms.csv
289 290
	$(call newTerms,0,$(rmNewTerms))
290
rmNewTerms = |$(bin)/filter_out_ci 0 $(coreMap)|$(bin)/filter_out_ci 0 $(dict)\
291
rmNewTerms = |$(bin)/filter_out_ci 0 $(vocab)|$(bin)/filter_out_ci 0 $(dict)\
291 292
$(if $(wildcard $(word 4,$+)),|$(bin)/filter_out_ci 0 $(word 4,$+))
292 293

  
293 294
termsSubdirs := $(tables)

Also available in: Unified diff