Project

General

Profile

« Previous | Next » 

Revision 4497

join: Match terms case-insensitively

View differences:

inputs/SALVIAS-CSV/Organism/VegBIEN.csv
20 20
GROUP BY ""PLOT_ID"", subplot
21 21
HAVING count(DISTINCT census_date) > 1
22 22
-----"
23
census_date,/location/locationevent/obsstartdate/_*/date/_dateRangeStart/value,"This is for the subplot, not the organism, as all organisms in a subplot have the same value for it. The following query returns no rows:
24
-----
25
SELECT ""PLOT_ID"", subplot, count(DISTINCT census_date) AS census_date_count
26
FROM ""SALVIAS-CSV"".organisms
27
WHERE subplot IS NOT NULL AND census_date IS NOT NULL
28
GROUP BY ""PLOT_ID"", subplot
29
HAVING count(DISTINCT census_date) > 1
30
-----"
23 31
no_of_individuals,/location/locationevent/taxonoccurrence/aggregateoccurrence/count,"Brad: This is a count of number of indiiduals for an *aggregate* observation. For VegBank, I'm not sure. Not exactly the same as stemCount. An individual tree could have 3 stems but would still only count as 1. We need to check with Bob on this."
24 32
cover_percent,/location/locationevent/taxonoccurrence/aggregateoccurrence/cover,
25 33
census_no,"/location/locationevent/taxonoccurrence/aggregateoccurrence/definedvalue[*_id/userdefined[tablename=aggregateoccurrence,userdefinedname=censusNo]]:[@fkey=tablerecord_id]/definedvalue","Brad: Assigned by SALVIAS. 1 for first plot, then 2, 3, etc. I can't recall if we even have repeat censuses in SALVIAS. Probably not."
inputs/SALVIAS-CSV/Organism/test.xml.ref
9 9
        </parent_id>
10 10
        <authorlocationcode>$subplot</authorlocationcode>
11 11
        <locationevent>
12
            <obsstartdate><_date><date><_dateRangeStart><value>$census_date</value></_dateRangeStart></date></_date></obsstartdate>
12
            <obsstartdate>
13
                <_date>
14
                    <date>
15
                        <_dateRangeStart>
16
                            <value>
17
                                <_alt>
18
                                    <0>$census_date</0>
19
                                    <1>$census_date</1>
20
                                </_alt>
21
                            </value>
22
                        </_dateRangeStart>
23
                    </date>
24
                </_date>
25
            </obsstartdate>
13 26
            <taxonoccurrence>
14 27
                <aggregateoccurrence>
15 28
                    <count>$no_of_individuals</count>
inputs/SALVIAS/plotObservations/VegBIEN.csv
20 20
GROUP BY ""PlotID"", ""Line""
21 21
HAVING count(DISTINCT census_date) > 1
22 22
-----"
23
census_date,/location/locationevent/obsenddate/_*/date/_dateRangeEnd/value/_alt/1,"This is for the subplot, not the organism, as all organisms in a subplot have the same value for it. The following query returns no rows:
24
-----
25
SELECT ""PlotID"", ""Line"", count(DISTINCT census_date) AS census_date_count
26
FROM ""SALVIAS"".organisms
27
WHERE ""Line"" IS NOT NULL AND census_date IS NOT NULL
28
GROUP BY ""PlotID"", ""Line""
29
HAVING count(DISTINCT census_date) > 1
30
-----"
23 31
census_date,/location/locationevent/obsstartdate/_*/date/_dateRangeStart/value/_alt/1,"This is for the subplot, not the organism, as all organisms in a subplot have the same value for it. The following query returns no rows:
24 32
-----
25 33
SELECT ""PlotID"", ""Line"", count(DISTINCT census_date) AS census_date_count
......
28 36
GROUP BY ""PlotID"", ""Line""
29 37
HAVING count(DISTINCT census_date) > 1
30 38
-----"
39
census_date,/location/locationevent/obsstartdate/_*/date/_dateRangeStart/value/_alt/1,"This is for the subplot, not the organism, as all organisms in a subplot have the same value for it. The following query returns no rows:
40
-----
41
SELECT ""PlotID"", ""Line"", count(DISTINCT census_date) AS census_date_count
42
FROM ""SALVIAS"".organisms
43
WHERE ""Line"" IS NOT NULL AND census_date IS NOT NULL
44
GROUP BY ""PlotID"", ""Line""
45
HAVING count(DISTINCT census_date) > 1
46
-----"
31 47
NoInd,/location/locationevent/taxonoccurrence/aggregateoccurrence/count,"Brad: This is a count of number of indiiduals for an *aggregate* observation. For VegBank, I'm not sure. Not exactly the same as stemCount. An individual tree could have 3 stems but would still only count as 1. We need to check with Bob on this."
32 48
cover_percent,/location/locationevent/taxonoccurrence/aggregateoccurrence/cover,
33 49
census_no,"/location/locationevent/taxonoccurrence/aggregateoccurrence/definedvalue[*_id/userdefined[tablename=aggregateoccurrence,userdefinedname=censusNo]]:[@fkey=tablerecord_id]/definedvalue","Brad: Assigned by SALVIAS. 1 for first plot, then 2, 3, etc. I can't recall if we even have repeat censuses in SALVIAS. Probably not."
inputs/SALVIAS/plotObservations/test.xml.ref
9 9
        </parent_id>
10 10
        <authorlocationcode>$Line</authorlocationcode>
11 11
        <locationevent>
12
            <obsenddate><_date><date><_dateRangeEnd><value>$census_date</value></_dateRangeEnd></date></_date></obsenddate>
13
            <obsstartdate><_date><date><_dateRangeStart><value>$census_date</value></_dateRangeStart></date></_date></obsstartdate>
12
            <obsenddate>
13
                <_date>
14
                    <date>
15
                        <_dateRangeEnd>
16
                            <value>
17
                                <_alt>
18
                                    <0>$census_date</0>
19
                                    <1>$census_date</1>
20
                                </_alt>
21
                            </value>
22
                        </_dateRangeEnd>
23
                    </date>
24
                </_date>
25
            </obsenddate>
26
            <obsstartdate>
27
                <_date>
28
                    <date>
29
                        <_dateRangeStart>
30
                            <value>
31
                                <_alt>
32
                                    <0>$census_date</0>
33
                                    <1>$census_date</1>
34
                                </_alt>
35
                            </value>
36
                        </_dateRangeStart>
37
                    </date>
38
                </_date>
39
            </obsstartdate>
14 40
            <taxonoccurrence>
15 41
                <aggregateoccurrence>
16 42
                    <count>$NoInd</count>
inputs/SpeciesLink/Specimen/VegBIEN.csv
63 63
SELECT count(*) FROM ""SpeciesLink"".specimens
64 64
WHERE COALESCE(""dwc_curatorial_FieldNumber"", ""dwc_terms_fieldNumber"", ""conceptual_darwin_2003_1_0_FieldNumber"") != ""dwc_terms_recordNumber""
65 65
-----"
66
FieldNumber,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/collectionnumber/_alt/2,"Usage inconsistent with DwC definition. This field is instead identical to recordNumber, as the following query returns zero: (takes 23 sec)
67
-----
68
SELECT count(*) FROM ""SpeciesLink"".specimens
69
WHERE COALESCE(""dwc_curatorial_FieldNumber"", ""dwc_terms_fieldNumber"", ""conceptual_darwin_2003_1_0_FieldNumber"") != ""dwc_terms_recordNumber""
70
-----"
66 71
fieldNumber,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/collectionnumber/_alt/2,"Usage inconsistent with DwC definition. This field is instead identical to recordNumber, as the following query returns zero: (takes 23 sec)
67 72
-----
68 73
SELECT count(*) FROM ""SpeciesLink"".specimens
69 74
WHERE COALESCE(""dwc_curatorial_FieldNumber"", ""dwc_terms_fieldNumber"", ""conceptual_darwin_2003_1_0_FieldNumber"") != ""dwc_terms_recordNumber""
70 75
-----"
76
fieldNumber,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/collectionnumber/_alt/2,"Usage inconsistent with DwC definition. This field is instead identical to recordNumber, as the following query returns zero: (takes 23 sec)
77
-----
78
SELECT count(*) FROM ""SpeciesLink"".specimens
79
WHERE COALESCE(""dwc_curatorial_FieldNumber"", ""dwc_terms_fieldNumber"", ""conceptual_darwin_2003_1_0_FieldNumber"") != ""dwc_terms_recordNumber""
80
-----"
71 81
Sex,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/definedvalue[*_id/userdefined[tablename=plantobservation,userdefinedname=sex]]:[@fkey=tablerecord_id]/definedvalue","Brad: OMIT. Apparently not used by this institution and of uncertain applicability to plants. With other collections, *sometimes* used for phenological information (if specimen in flower, in fruit, sterile, etc.). If so, this is important information we should keep. Bob, Nick, anywhere for phenological information in VB and VX?; Aaron: Keeping it because it's useful for some collections"
72 82
sex,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/definedvalue[*_id/userdefined[tablename=plantobservation,userdefinedname=sex]]:[@fkey=tablerecord_id]/definedvalue","Brad: OMIT. Apparently not used by this institution and of uncertain applicability to plants. With other collections, *sometimes* used for phenological information (if specimen in flower, in fruit, sterile, etc.). If so, this is important information we should keep. Bob, Nick, anywhere for phenological information in VB and VX?; Aaron: Keeping it because it's useful for some collections"
73 83
CatalogNumber,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/else",
......
85 95
SELECT count(*) FROM ""SpeciesLink"".specimens
86 96
WHERE COALESCE(""dwc_curatorial_FieldNumber"", ""dwc_terms_fieldNumber"", ""conceptual_darwin_2003_1_0_FieldNumber"") != ""dwc_terms_recordNumber""
87 97
-----"
98
FieldNumber,/location/locationevent/taxonoccurrence/authortaxoncode/_alt/1/_alt/2,"Usage inconsistent with DwC definition. This field is instead identical to recordNumber, as the following query returns zero: (takes 23 sec)
99
-----
100
SELECT count(*) FROM ""SpeciesLink"".specimens
101
WHERE COALESCE(""dwc_curatorial_FieldNumber"", ""dwc_terms_fieldNumber"", ""conceptual_darwin_2003_1_0_FieldNumber"") != ""dwc_terms_recordNumber""
102
-----"
88 103
fieldNumber,/location/locationevent/taxonoccurrence/authortaxoncode/_alt/1/_alt/2,"Usage inconsistent with DwC definition. This field is instead identical to recordNumber, as the following query returns zero: (takes 23 sec)
89 104
-----
90 105
SELECT count(*) FROM ""SpeciesLink"".specimens
91 106
WHERE COALESCE(""dwc_curatorial_FieldNumber"", ""dwc_terms_fieldNumber"", ""conceptual_darwin_2003_1_0_FieldNumber"") != ""dwc_terms_recordNumber""
92 107
-----"
108
fieldNumber,/location/locationevent/taxonoccurrence/authortaxoncode/_alt/1/_alt/2,"Usage inconsistent with DwC definition. This field is instead identical to recordNumber, as the following query returns zero: (takes 23 sec)
109
-----
110
SELECT count(*) FROM ""SpeciesLink"".specimens
111
WHERE COALESCE(""dwc_curatorial_FieldNumber"", ""dwc_terms_fieldNumber"", ""conceptual_darwin_2003_1_0_FieldNumber"") != ""dwc_terms_recordNumber""
112
-----"
93 113
ScientificName,/location/locationevent/taxonoccurrence/authortaxoncode/_alt/2/_alt/2,
94 114
scientificName,/location/locationevent/taxonoccurrence/authortaxoncode/_alt/2/_alt/2,
95 115
IdentifiedBy,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/party/givenname/_namePart/first,
inputs/SpeciesLink/Specimen/test.xml.ref
393 393
                                    <_alt>
394 394
                                        <0>$dwc_curatorial_FieldNumber</0>
395 395
                                        <1>$conceptual_darwin_2003_1_0_FieldNumber</1>
396
                                        <2>$dwc_terms_fieldNumber</2>
396
                                        <2>$dwc_curatorial_FieldNumber</2>
397
                                        <3>$conceptual_darwin_2003_1_0_FieldNumber</3>
398
                                        <4>$dwc_terms_fieldNumber</4>
399
                                        <5>$dwc_terms_fieldNumber</5>
397 400
                                    </_alt>
398 401
                                </2>
399 402
                            </_alt>
......
463 466
                                    <_alt>
464 467
                                        <0>$dwc_curatorial_FieldNumber</0>
465 468
                                        <1>$conceptual_darwin_2003_1_0_FieldNumber</1>
466
                                        <2>$dwc_terms_fieldNumber</2>
469
                                        <2>$dwc_curatorial_FieldNumber</2>
470
                                        <3>$conceptual_darwin_2003_1_0_FieldNumber</3>
471
                                        <4>$dwc_terms_fieldNumber</4>
472
                                        <5>$dwc_terms_fieldNumber</5>
467 473
                                    </_alt>
468 474
                                </2>
469 475
                            </_alt>
bin/join
36 36
    map_1_cols = reader.next()
37 37
    for row in reader:
38 38
        if map_1_core_only: row = row[:2]
39
        if row[0] != '': map_1.setdefault(row[0], []).append(row)
39
        if row[0] != '': map_1.setdefault(row[0].lower(), []).append(row)
40 40
    stream.close()
41 41
    
42 42
    # Join map 1 to map 0
......
63 63
            suffix = ''
64 64
            while True:
65 65
                try:
66
                    out_rows = map_1[row[1]]
66
                    out_rows = map_1[row[1].lower()]
67 67
                    break
68 68
                except KeyError:
69 69
                    # Heuristically look for a match on a parent path.

Also available in: Unified diff