Revision 4648
Added by Aaron Marcuse-Kubitza over 12 years ago
inputs/CTFS/PlotObservation/unmapped_terms.csv | ||
---|---|---|
1 |
Description |
inputs/FIA/Organism/unmapped_terms.csv | ||
---|---|---|
1 |
HOM |
inputs/VegBank/observation_/unmapped_terms.csv | ||
---|---|---|
1 |
previousobs_id |
|
2 | 1 |
dateaccuracy |
3 | 2 |
covermethod_id |
4 | 3 |
coverdispersion |
bin/filter_out_ci | ||
---|---|---|
1 | 1 |
#!/usr/bin/env python |
2 | 2 |
# Finds spreadsheet rows where a column is not in a vocabulary. |
3 |
# The vocabulary should not have a header. CSVs without a header are supported. |
|
3 | 4 |
# Case- and punctuation-insensitive. |
4 | 5 |
|
5 | 6 |
import csv |
... | ... | |
18 | 19 |
vocab = set() |
19 | 20 |
stream = open(vocab_path, 'rb') |
20 | 21 |
reader = csv.reader(stream) |
21 |
reader.next() # skip header |
|
22 | 22 |
for row in reader: vocab.add(simplify(row[0])) |
23 | 23 |
stream.close() |
24 | 24 |
|
25 | 25 |
# Filter input |
26 | 26 |
reader = csv.reader(sys.stdin) |
27 | 27 |
writer = csv.writer(sys.stdout) |
28 |
writer.writerow(reader.next()) # pass through header |
|
29 | 28 |
for row in reader: |
30 | 29 |
term = simplify(row[col_num]) |
31 | 30 |
if term not in vocab: writer.writerow(row) |
inputs/XAL/Specimen/unmapped_terms.csv | ||
---|---|---|
1 |
dcterms:modified |
|
2 | 1 |
BasisOfRecord |
3 | 2 |
TypeStatus |
4 | 3 |
recordNumber/_alt/1 |
inputs/TEAM/VL/unmapped_terms.csv | ||
---|---|---|
1 |
max_diam |
|
2 | 1 |
max_diam_pom |
3 | 2 |
ConditionCodes |
4 | 3 |
LocationCodes |
inputs/TEAM/VT/unmapped_terms.csv | ||
---|---|---|
1 |
dbh_pom |
|
2 | 1 |
dbh_new |
3 | 2 |
dbh_new_pom |
4 | 3 |
ConditionCodes |
inputs/NY/Specimen/unmapped_terms.csv | ||
---|---|---|
1 |
key |
|
2 | 1 |
dcterms:modified |
3 | 2 |
BasisOfRecord |
4 | 3 |
TypeStatus |
inputs/Madidi/Organism/unmapped_terms.csv | ||
---|---|---|
1 |
Number of inventory (general) |
|
2 | 1 |
Number of inventory (expedition) |
3 | 2 |
"Type Inventory (C, PP, TP)" |
4 | 3 |
Similar specimen |
inputs/Madidi/Plot/unmapped_terms.csv | ||
---|---|---|
1 |
Inventory number |
|
2 | 1 |
Number of inventory (expedition) |
3 | 2 |
Installed and evaluated by |
4 | 3 |
Collaborators |
inputs/UNCC/Specimen/unmapped_terms.csv | ||
---|---|---|
1 |
taxonRank/_alt/1 |
|
2 | 1 |
taxonRank/_alt/2 |
3 | 2 |
Collector/_merge/1 |
4 | 3 |
Collector/_merge/2 |
inputs/SALVIAS-CSV/Organism/unmapped_terms.csv | ||
---|---|---|
1 |
ind_id |
|
2 | 1 |
tag/_alt/1 |
3 | 2 |
previousTag/_alt/1 |
4 | 3 |
catalogNumber/_alt/2 |
inputs/SALVIAS-CSV/Plot/unmapped_terms.csv | ||
---|---|---|
1 |
"clay/_units:[default=""%"",to=]/value" |
|
2 | 1 |
"silt/_units:[default=""%"",to=]/value" |
3 | 2 |
"sand/_units:[default=""%"",to=]/value" |
4 | 3 |
"organic/_units:[default=""%"",to=]/value" |
inputs/CVS/Organism/unmapped_terms.csv | ||
---|---|---|
1 | 1 |
OMIT |
2 |
OMIT |
|
3 | 2 |
confidentialityStatus |
inputs/REMIB/Specimen/unmapped_terms.csv | ||
---|---|---|
1 |
long_min |
|
2 | 1 |
long_sec |
3 | 2 |
lat_min |
4 | 3 |
lat_sec |
inputs/CTFS/StemObservation/unmapped_terms.csv | ||
---|---|---|
1 |
FamilyID |
|
2 | 1 |
GenusID |
3 | 2 |
SpeciesID |
4 | 3 |
SubSpeciesID |
inputs/CTFS/Subplot/unmapped_terms.csv | ||
---|---|---|
1 |
OMIT |
|
2 | 1 |
DimX |
3 | 2 |
DimY |
inputs/CTFS/TaxonOccurrence/unmapped_terms.csv | ||
---|---|---|
1 |
FamilyID |
|
2 | 1 |
GenusID |
3 | 2 |
SpeciesID |
4 | 3 |
SubSpeciesID |
inputs/CTFS/Plot/unmapped_terms.csv | ||
---|---|---|
1 |
CountryID |
|
2 | 1 |
ReferenceX |
3 | 2 |
ReferenceY |
4 | 3 |
row_num |
inputs/SALVIAS/plotMetadata/unmapped_terms.csv | ||
---|---|---|
1 |
orig_filename |
|
2 | 1 |
AccessCode |
3 | 2 |
PrimOwnerID |
4 | 3 |
SiteName |
inputs/SALVIAS/projects/unmapped_terms.csv | ||
---|---|---|
1 |
project_pi |
|
2 | 1 |
allow_download_all |
3 | 2 |
ipr_specific |
4 | 3 |
ipr_specific_updated |
inputs/SALVIAS/stems/unmapped_terms.csv | ||
---|---|---|
1 |
origrecord_id_stems |
|
2 | 1 |
tmp_del |
3 | 2 |
plotobs_id_index |
inputs/SALVIAS/plotObservations/unmapped_terms.csv | ||
---|---|---|
1 |
OrigRecordID |
|
2 | 1 |
ind_id |
3 | 2 |
dist |
4 | 3 |
perp_dist |
inputs/U/Specimen/unmapped_terms.csv | ||
---|---|---|
1 |
Collector/_merge/1 |
|
2 | 1 |
Prefix |
3 | 2 |
Suffix |
4 | 3 |
Collector/_merge/2 |
inputs/ARIZ/Specimen/unmapped_terms.csv | ||
---|---|---|
1 |
dcterms:modified |
|
2 | 1 |
BasisOfRecord |
3 | 2 |
CatalogNumber/_alt/2 |
4 | 3 |
CatalogNumber/_alt/1 |
inputs/GBIF/Specimen/unmapped_terms.csv | ||
---|---|---|
1 |
0 |
|
2 | 1 |
1 |
3 | 2 |
3 |
4 | 3 |
dcterms:modified |
inputs/NCU-NCSC/Specimen/unmapped_terms.csv | ||
---|---|---|
1 |
"Latitude/_replace:[""\?$""=]/value" |
|
2 | 1 |
"Longitude/_replace:[""\?$""=]/value" |
3 | 2 |
FIPS |
4 | 3 |
Alt N/S |
inputs/SpeciesLink/Specimen/unmapped_terms.csv | ||
---|---|---|
1 |
dcterms:modified |
|
2 | 1 |
BasisOfRecord |
3 | 2 |
ScientificNameAuthor/_alt/3 |
4 | 3 |
JulianDay/_alt/2 |
inputs/VegBank/taxonobservation_/unmapped_terms.csv | ||
---|---|---|
1 |
plantconcept_id |
|
2 | 1 |
taxonobservation_reference_id |
3 | 2 |
taxoninferencearea |
4 | 3 |
emb_taxonobservation |
inputs/VegBank/plot_/unmapped_terms.csv | ||
---|---|---|
1 |
reference_id |
|
2 | 1 |
parentPlotID |
3 | 2 |
OMIT |
4 | 3 |
OMIT |
Also available in: Unified diff
filter_out_ci: Filter header instead of passing it through, in order to properly support CSVs without a header, such as the unmapped_terms.csv and new_terms.csv files. For CSVs with a header, the header of the vocabulary should be removed before passing it to filter_out_ci.