Revision 5491
Added by Aaron Marcuse-Kubitza about 12 years ago
inputs/.NCBI/higher_taxa/create.sql | ||
---|---|---|
1 |
SELECT * |
|
2 |
FROM nodes |
|
3 |
JOIN names USING (tax_id) |
|
4 |
WHERE "name class" = 'scientific name' |
|
5 |
; |
|
6 |
|
|
7 |
ALTER TABLE :table ALTER COLUMN tax_id TYPE integer |
|
8 |
USING tax_id::integer; |
|
9 |
ALTER TABLE :table ALTER COLUMN "parent tax_id" TYPE integer |
|
10 |
USING "parent tax_id"::integer; |
|
11 |
|
|
12 |
ALTER TABLE :table ADD PRIMARY KEY (tax_id); |
inputs/.NCBI/higher_taxa/header.csv | ||
---|---|---|
1 |
tax_id,parent tax_id,rank,embl code,4,5,6,7,8,9,name_txt,unique name,name class |
inputs/.NCBI/higher_taxa/map.csv | ||
---|---|---|
1 |
NCBI,VegCore,Filter,Comments |
|
2 |
tax_id,taxonID,, |
|
3 |
parent tax_id,parentTaxonID,, |
|
4 |
rank,taxonRank,, |
|
5 |
embl code,*embl code,, |
|
6 |
4,*4,, |
|
7 |
5,*5,, |
|
8 |
6,*6,, |
|
9 |
7,*7,, |
|
10 |
8,*8,, |
|
11 |
9,*9,, |
|
12 |
name_txt,taxonName,, |
|
13 |
unique name,*unique name,, |
|
14 |
name class,*name class,, |
inputs/.NCBI/higher_taxa/VegBIEN.csv | ||
---|---|---|
1 |
NCBI,VegBIEN:/_simplifyPath:[next=parent_id]/path,Comments |
|
2 |
parent tax_id,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/_if[@name=""if has explicit parent""]/cond/_exists", |
|
3 |
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/_if[@name=""if has explicit parent""]/else/_if[@name=""if has verbatim name""]/{then/.,else/matched_concept_id/taxonconcept:[creator_id/party:[creator_id=0]/organizationname=TNRS]}/identifyingtaxonomicname/_alt/2/_join_words/1/_alt/2/_join_words/5/_join_words/1/_if[@name=""if has infraspecificEpithet""]/then", |
|
4 |
name_txt,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/_if[@name=""if has explicit parent""]/else/_if[@name=""if has verbatim name""]/{then/.,else/matched_concept_id/taxonconcept:[creator_id/party:[creator_id=0]/organizationname=TNRS]}/identifyingtaxonomicname/_alt/2/_join_words/1/_alt/2/_join_words/6", |
|
5 |
parent tax_id,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/parent_id/taxonconcept/_if[@name=""if has explicit parent""]/cond/_exists", |
|
6 |
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/parent_id/taxonconcept/_if[@name=""if has explicit parent""]/else/rank", |
|
7 |
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/parent_id/taxonconcept/_if[@name=""if has explicit parent""]/else/verbatimrank", |
|
8 |
parent tax_id,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/parent_id/taxonconcept/sourceaccessioncode, |
|
9 |
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/rank, |
|
10 |
tax_id,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/sourceaccessioncode, |
|
11 |
name_txt,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/taxonname, |
|
12 |
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonconcept/verbatimrank, |
|
13 |
4,,** No join mapping for *4 ** |
|
14 |
5,,** No join mapping for *5 ** |
|
15 |
6,,** No join mapping for *6 ** |
|
16 |
7,,** No join mapping for *7 ** |
|
17 |
8,,** No join mapping for *8 ** |
|
18 |
9,,** No join mapping for *9 ** |
|
19 |
embl code,,** No join mapping for *embl code ** |
|
20 |
name class,,** No join mapping for *name class ** |
|
21 |
unique name,,** No join mapping for *unique name ** |
inputs/.NCBI/higher_taxa/test.xml.ref | ||
---|---|---|
1 |
Put template: |
|
2 |
<VegBIEN> |
|
3 |
<_simplifyPath id="-1"> |
|
4 |
<next>parent_id</next> |
|
5 |
<path> |
|
6 |
<location> |
|
7 |
<locationevent> |
|
8 |
<taxonoccurrence> |
|
9 |
<taxondetermination> |
|
10 |
<taxonconcept_id> |
|
11 |
<taxonconcept> |
|
12 |
<parent_id><taxonconcept><sourceaccessioncode>$parent tax_id</sourceaccessioncode></taxonconcept></parent_id> |
|
13 |
<rank>$rank</rank> |
|
14 |
<sourceaccessioncode>$tax_id</sourceaccessioncode> |
|
15 |
<taxonname>$name_txt</taxonname> |
|
16 |
<verbatimrank>$rank</verbatimrank> |
|
17 |
</taxonconcept> |
|
18 |
</taxonconcept_id> |
|
19 |
</taxondetermination> |
|
20 |
</taxonoccurrence> |
|
21 |
</locationevent> |
|
22 |
</location> |
|
23 |
</path> |
|
24 |
</_simplifyPath> |
|
25 |
</VegBIEN> |
|
26 |
Inserted 7 new rows into database |
inputs/.NCBI/higher_taxa/new_terms.csv | ||
---|---|---|
1 |
tax_id,taxonID,, |
|
2 |
parent tax_id,parentTaxonID,, |
|
3 |
rank,taxonRank,, |
|
4 |
name_txt,taxonName,, |
inputs/.NCBI/higher_taxa/unmapped_terms.csv | ||
---|---|---|
1 |
*embl code |
|
2 |
*4 |
|
3 |
*5 |
|
4 |
*6 |
|
5 |
*7 |
|
6 |
*8 |
|
7 |
*9 |
|
8 |
*unique name |
|
9 |
*name class |
inputs/.NCBI/names/+header.dmp | ||
---|---|---|
1 |
tax_id | name_txt | unique name | name class | |
inputs/.NCBI/import_order.txt | ||
---|---|---|
1 |
higher_taxa |
inputs/.NCBI/_src/taxdmp.zip.url | ||
---|---|---|
1 |
ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip |
|
2 |
ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/ |
|
3 |
http://www.ncbi.nlm.nih.gov/guide/taxonomy/ |
inputs/.NCBI/_src/readme.txt | ||
---|---|---|
1 |
link taxdmp/readme.txt |
|
0 | 2 |
inputs/.NCBI/_src/taxdmp/readme.txt | ||
---|---|---|
1 |
*.dmp files are bcp-like dump from GenBank taxonomy database. |
|
2 |
|
|
3 |
General information. |
|
4 |
Field terminator is "\t|\t" |
|
5 |
Row terminator is "\t|\n" |
|
6 |
|
|
7 |
nodes.dmp file consists of taxonomy nodes. The description for each node includes the following |
|
8 |
fields: |
|
9 |
tax_id -- node id in GenBank taxonomy database |
|
10 |
parent tax_id -- parent node id in GenBank taxonomy database |
|
11 |
rank -- rank of this node (superkingdom, kingdom, ...) |
|
12 |
embl code -- locus-name prefix; not unique |
|
13 |
division id -- see division.dmp file |
|
14 |
inherited div flag (1 or 0) -- 1 if node inherits division from parent |
|
15 |
genetic code id -- see gencode.dmp file |
|
16 |
inherited GC flag (1 or 0) -- 1 if node inherits genetic code from parent |
|
17 |
mitochondrial genetic code id -- see gencode.dmp file |
|
18 |
inherited MGC flag (1 or 0) -- 1 if node inherits mitochondrial gencode from parent |
|
19 |
GenBank hidden flag (1 or 0) -- 1 if name is suppressed in GenBank entry lineage |
|
20 |
hidden subtree root flag (1 or 0) -- 1 if this subtree has no sequence data yet |
|
21 |
comments -- free-text comments and citations |
|
22 |
|
|
23 |
Taxonomy names file (names.dmp): |
|
24 |
tax_id -- the id of node associated with this name |
|
25 |
name_txt -- name itself |
|
26 |
unique name -- the unique variant of this name if name not unique |
|
27 |
name class -- (synonym, common name, ...) |
|
28 |
|
|
29 |
Divisions file (division.dmp): |
|
30 |
division id -- taxonomy database division id |
|
31 |
division cde -- GenBank division code (three characters) |
|
32 |
division name -- e.g. BCT, PLN, VRT, MAM, PRI... |
|
33 |
comments |
|
34 |
|
|
35 |
Genetic codes file: |
|
36 |
genetic code id -- GenBank genetic code id |
|
37 |
abbreviation -- genetic code name abbreviation |
|
38 |
name -- genetic code name |
|
39 |
cde -- translation table for this genetic code |
|
40 |
starts -- start codons for this genetic code |
|
41 |
|
|
42 |
Deleted nodes file (delnodes.dmp): |
|
43 |
tax_id -- deleted node id |
|
44 |
|
|
45 |
Merged nodes file (merged.dmp): |
|
46 |
old_tax_id -- id of nodes which has been merged |
|
47 |
new_tax_id -- id of nodes which is result of merging |
|
48 |
|
|
49 |
Citations file (citations.dmp): |
|
50 |
cit_id -- the unique id of citation |
|
51 |
cit_key -- citation key |
|
52 |
pubmed_id -- unique id in PubMed database (0 if not in PubMed) |
|
53 |
medline_id -- unique id in MedLine database (0 if not in MedLine) |
|
54 |
url -- URL associated with citation |
|
55 |
text -- any text (usually article name and authors). |
|
56 |
-- The following characters are escaped in this text by a backslash: |
|
57 |
-- newline (appear as "\n"), |
|
58 |
-- tab character ("\t"), |
|
59 |
-- double quotes ('\"'), |
|
60 |
-- backslash character ("\\"). |
|
61 |
taxid_list -- list of node ids separated by a single space |
inputs/.NCBI/nodes/+header.dmp | ||
---|---|---|
1 |
tax_id | parent tax_id | rank | embl code | 4 | 5 | 6 | 7 | 8 | 9 | |
lib/forwarding.Makefile | ||
---|---|---|
1 | 1 |
##### Subdir forwarding |
2 | 2 |
|
3 |
subdirs := .TNRS/ $(wildcard */) |
|
3 |
subdirs := .NCBI/ .TNRS/ $(wildcard */)
|
|
4 | 4 |
allSubdirs := $(subdirs) .public/ |
5 | 5 |
|
6 | 6 |
define subdirTargets |
Also available in: Unified diff
Added inputs/.NCBI/. This uses many of the new schema and mappings features, such as taxonconcept.sourceaccessioncode and parentTaxonID