Revision 11863
Added by Aaron Marcuse-Kubitza almost 11 years ago
inputs/.NCBI/nodes/new_terms.csv | ||
---|---|---|
1 |
tax_id,taxonID,, |
|
2 |
parent tax_id,parentTaxonID,, |
|
3 |
rank,taxonRank,, |
|
4 |
name_txt,taxonNameOrEpithet,, |
inputs/.NCBI/import_order.txt | ||
---|---|---|
1 | 1 |
Source |
2 |
nodes.src |
|
3 |
names.src |
|
2 | 4 |
nodes |
inputs/.NCBI/nodes/create.sql | ||
---|---|---|
1 | 1 |
SELECT * |
2 | 2 |
FROM "nodes.src" |
3 |
JOIN "names.src" USING (tax_id)
|
|
4 |
WHERE "name class" = 'scientific name' |
|
3 |
JOIN "names.src" USING ("taxonID")
|
|
4 |
WHERE "*name class" = 'scientific name'
|
|
5 | 5 |
; |
6 | 6 |
|
7 |
ALTER TABLE :table ALTER COLUMN tax_id TYPE integer
|
|
8 |
USING tax_id::integer;
|
|
9 |
ALTER TABLE :table ALTER COLUMN "parent tax_id" TYPE integer
|
|
10 |
USING "parent tax_id"::integer;
|
|
7 |
ALTER TABLE :table ALTER COLUMN "taxonID" TYPE integer
|
|
8 |
USING "taxonID"::integer;
|
|
9 |
ALTER TABLE :table ALTER COLUMN "parentTaxonID" TYPE integer
|
|
10 |
USING "parentTaxonID"::integer;
|
|
11 | 11 |
|
12 |
ALTER TABLE :table ADD PRIMARY KEY (tax_id);
|
|
12 |
ALTER TABLE :table ADD PRIMARY KEY ("taxonID");
|
|
13 | 13 |
|
14 |
CREATE INDEX ON :table ("parent tax_id");
|
|
14 |
CREATE INDEX ON :table ("parentTaxonID");
|
|
15 | 15 |
|
16 |
ALTER TABLE :table ADD FOREIGN KEY ("parent tax_id") REFERENCES :table (tax_id)
|
|
16 |
ALTER TABLE :table ADD FOREIGN KEY ("parentTaxonID") REFERENCES :table ("taxonID")
|
|
17 | 17 |
ON UPDATE CASCADE ON DELETE CASCADE; |
18 | 18 |
|
19 |
-- Make name_txt (mostly) globally unique by removing other kingdoms
|
|
19 |
-- Make "taxonNameOrEpithet" (mostly) globally unique by removing other kingdoms
|
|
20 | 20 |
-- Note that the delete will cascade to descendants |
21 |
DELETE FROM :table WHERE tax_id IN (
|
|
21 |
DELETE FROM :table WHERE "taxonID" IN (
|
|
22 | 22 |
10239 -- superkingdom Viruses |
23 | 23 |
, 12884 -- superkingdom Viroids |
24 | 24 |
, 12908 -- unclassified sequences |
... | ... | |
29 | 29 |
, 33208 -- kingdom Animalia/Metazoa |
30 | 30 |
); |
31 | 31 |
|
32 |
-- Make name_txt completely globally unique by removing duplicates
|
|
32 |
-- Make "taxonNameOrEpithet" completely globally unique by removing duplicates
|
|
33 | 33 |
DELETE FROM :table |
34 |
WHERE name_txt IN (
|
|
35 |
SELECT name_txt
|
|
34 |
WHERE "taxonNameOrEpithet" IN (
|
|
35 |
SELECT "taxonNameOrEpithet"
|
|
36 | 36 |
FROM :table |
37 |
GROUP BY name_txt
|
|
37 |
GROUP BY "taxonNameOrEpithet"
|
|
38 | 38 |
HAVING count(*) > 1 |
39 | 39 |
) |
40 | 40 |
; |
inputs/.NCBI/nodes/header.csv | ||
---|---|---|
1 |
tax_id,nodes.src.row_num,parent tax_id,rank,embl code,4,5,6,7,8,9,names.src.row_num,name_txt,unique name,name class |
|
1 |
taxonID,*nodes.src.row_num,parentTaxonID,taxonRank,*embl code,*4,*5,*6,*7,*8,*9,*names.src.row_num,taxonNameOrEpithet,*unique name,*name class |
inputs/.NCBI/nodes/map.csv | ||
---|---|---|
1 | 1 |
NCBI,VegCore,Filter,Comments |
2 |
tax_id,taxonID,, |
|
3 |
nodes.src.row_num,*nodes.src.row_num,, |
|
4 |
parent tax_id,parentTaxonID,, |
|
5 |
rank,taxonRank,, |
|
6 |
embl code,*embl code,, |
|
7 |
4,*4,, |
|
8 |
5,*5,, |
|
9 |
6,*6,, |
|
10 |
7,*7,, |
|
11 |
8,*8,, |
|
12 |
9,*9,, |
|
13 |
names.src.row_num,*names.src.row_num,, |
|
14 |
name_txt,taxonNameOrEpithet,, |
|
15 |
unique name,*unique name,, |
|
16 |
name class,*name class,, |
|
2 |
taxonID,taxonID,, |
|
3 |
*nodes.src.row_num,*nodes.src.row_num,, |
|
4 |
parentTaxonID,parentTaxonID,, |
|
5 |
taxonRank,taxonRank,, |
|
6 |
*embl code,*embl code,, |
|
7 |
*4,*4,, |
|
8 |
*5,*5,, |
|
9 |
*6,*6,, |
|
10 |
*7,*7,, |
|
11 |
*8,*8,, |
|
12 |
*9,*9,, |
|
13 |
*names.src.row_num,*names.src.row_num,, |
|
14 |
taxonNameOrEpithet,taxonNameOrEpithet,, |
|
15 |
*unique name,*unique name,, |
|
16 |
*name class,*name class,, |
inputs/.NCBI/names.src/run | ||
---|---|---|
1 |
#!/bin/bash -e |
|
2 |
. "$(dirname "${BASH_SOURCE[0]}")"/../table.run |
|
0 | 3 |
inputs/.NCBI/names.src/map.csv | ||
---|---|---|
1 |
NCBI,VegCore,Filter,Comments |
|
2 |
names.src.row_num,*names.src.row_num,, |
|
3 |
tax_id,taxonID,, |
|
4 |
name_txt,taxonNameOrEpithet,, |
|
5 |
unique name,*unique name,, |
|
6 |
name class,*name class,, |
inputs/.NCBI/names.src/new_terms.csv | ||
---|---|---|
1 |
tax_id,taxonID,, |
|
2 |
name_txt,taxonNameOrEpithet,, |
inputs/.NCBI/names.src/unmapped_terms.csv | ||
---|---|---|
1 |
*names.src.row_num |
|
2 |
*unique name |
|
3 |
*name class |
inputs/.NCBI/nodes.src/run | ||
---|---|---|
1 |
#!/bin/bash -e |
|
2 |
. "$(dirname "${BASH_SOURCE[0]}")"/../table.run |
|
0 | 3 |
inputs/.NCBI/nodes.src/map.csv | ||
---|---|---|
1 |
NCBI,VegCore,Filter,Comments |
|
2 |
nodes.src.row_num,*nodes.src.row_num,, |
|
3 |
tax_id,taxonID,, |
|
4 |
parent tax_id,parentTaxonID,, |
|
5 |
rank,taxonRank,, |
|
6 |
embl code,*embl code,, |
|
7 |
4,*4,, |
|
8 |
5,*5,, |
|
9 |
6,*6,, |
|
10 |
7,*7,, |
|
11 |
8,*8,, |
|
12 |
9,*9,, |
inputs/.NCBI/nodes.src/new_terms.csv | ||
---|---|---|
1 |
tax_id,taxonID,, |
|
2 |
parent tax_id,parentTaxonID,, |
|
3 |
rank,taxonRank,, |
inputs/.NCBI/nodes.src/unmapped_terms.csv | ||
---|---|---|
1 |
*nodes.src.row_num |
|
2 |
*embl code |
|
3 |
*4 |
|
4 |
*5 |
|
5 |
*6 |
|
6 |
*7 |
|
7 |
*8 |
|
8 |
*9 |
Also available in: Unified diff
bugfix: inputs/.NCBI/import_order.txt: added nodes.src, names.src so that these would be installed under new-style import as well. this means that their columns will now be automapped, requiring the names to be renamed to VegCore names in nodes/create.sql.