Project

General

Profile

1
#!/bin/bash -e
2
. "$(dirname "${BASH_SOURCE[0]}")"/../../../lib/runscripts/mysql.table.run
3

    
4
if self_not_included; then
5

    
6
full_table="${table%_plants}"
7

    
8
herbaria_filter.ih.csv_/make()
9
{
10
	begin_target; check_target_exists
11
	
12
	use_local
13
	limit= query='SELECT DISTINCT "specimenHolderInstitutions" FROM herbaria.ih ORDER BY "specimenHolderInstitutions"' \
14
to_target pg_export
15
}
16

    
17
ih_herbarium/make() # stores IH herbaria
18
{
19
	begin_target
20
	
21
	use_local_remote
22
	local table="$target_stem"
23
	if remaking; then mk_drop|mysql; fi
24
	
25
	herbaria_filter.ih.csv_/make
26
	(benign_error=1 mysql <<'EOF'
27
CREATE TABLE `ih_herbarium` (
28
 `institution_code` varchar(255) NOT NULL PRIMARY KEY
29
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='from IH.vegpath.org?&Index+Herbariorum'
30
EOF
31
) || { skip_table; return 0; }
32
	append=1 mysql_import <"$top_dir"/herbaria_filter.ih.csv_
33
}
34

    
35
plant_fraction/make() # runtime: 1 hr ("10603 rows affected (53 min 44.68 sec)")
36
{
37
	begin_target; mk_table_esc
38
	
39
	use_local_remote
40
	if remaking; then table=plant_fraction table_esc= mk_drop|mysql; fi
41
	(benign_error=1 mysql <<'EOF'
42
CREATE TABLE `plant_fraction` (
43
 `institution_code` varchar(255) NOT NULL PRIMARY KEY,
44
 `plant_fraction` float NOT NULL,
45
 KEY `plant_fraction` (`plant_fraction`)
46
) ENGINE=MyISAM DEFAULT CHARSET=utf8
47
EOF
48
) || { table=plant_fraction skip_table; return 0; }
49
	mysql <<EOF
50
INSERT INTO "plant_fraction"
51
SELECT
52
  institution_code
53
, COUNT(NULLIF(
54
	family LIKE '%aceae%' OR family IN ('Compositae','Gramineae','Palmae','Guttiferae','Cruciferae','Labiatae','Umbelliferae','Leguminosae')
55
	, false))/COUNT(*)
56
	AS plant_fraction
57
FROM ${limit:+(SELECT * FROM }$table_esc ${limit:+$(prefix= limit)) s}
58
GROUP BY institution_code
59
ORDER BY institution_code
60
EOF
61
}
62

    
63
plant_fraction/seal()
64
{
65
	begin_target
66
	
67
	use_local_remote
68
	table=plant_fraction table_esc= mysql_seal_table
69
}
70

    
71
plant_fraction_for_herbaria_filter/make()
72
{
73
	begin_target; mk_table_esc
74
	
75
	plant_fraction/make
76
	use_local_remote
77
	if remaking; then
78
		table=plant_fraction_for_herbaria_filter table_esc= mk_drop|mysql
79
	fi
80
	(benign_error=1 mysql <<'EOF'
81
CREATE VIEW `plant_fraction_for_herbaria_filter` AS
82
SELECT * FROM `plant_fraction`
83
WHERE plant_fraction >= 0.8
84
EOF
85
) || { table=plant_fraction_for_herbaria_filter skip_table; return 0; }
86
}
87

    
88
herbaria_filter/make() # runtime: ~0 ("0m2.235s"): CSV import, index scans only
89
{
90
	begin_target
91
	
92
	use_local_remote
93
	local table="$target_stem"
94
	if remaking; then mk_drop|mysql; fi
95
	
96
	ih_herbarium/make
97
	plant_fraction_for_herbaria_filter/make
98
	
99
	(benign_error=1 mysql <<'EOF'
100
CREATE TABLE `herbaria_filter` (
101
 `institution_code` varchar(255) NOT NULL PRIMARY KEY
102
) ENGINE=MyISAM DEFAULT CHARSET=utf8
103
EOF
104
) || { skip_table; return 0; }
105
	mysql <<EOF
106
INSERT IGNORE INTO $table
107
SELECT institution_code FROM ih_herbarium
108
;
109
INSERT IGNORE INTO $table
110
SELECT institution_code FROM plant_fraction_for_herbaria_filter
111
;
112
EOF
113
}
114

    
115
herbaria_filter/seal()
116
{
117
	begin_target
118
	
119
	use_local_remote
120
	table=herbaria_filter table_esc= mysql_seal_table
121
}
122

    
123
table/make() # makes the filter view
124
{
125
	begin_target; mk_table_esc
126
	
127
	herbaria_filter/make
128
	use_local_remote
129
	if remaking; then mk_drop|mysql; fi
130
	(benign_error=1 mysql <<EOF
131
CREATE VIEW $table AS
132
SELECT * FROM $full_table
133
WHERE deleted IS NULL
134
AND institution_code IN (SELECT * FROM herbaria_filter)
135
AND (
136
	    family IS NULL
137
	OR  (family LIKE '%aceae%' OR family IN ('Compositae','Gramineae','Palmae','Guttiferae','Cruciferae','Labiatae','Umbelliferae','Leguminosae'))
138
)
139
EOF
140
) || { skip_table; return 0; }
141
}
142

    
143
# table.tsv.gz/upload() runtime: 15 min ("2184253533 100% 2.30MB/s 0:15:03")
144

    
145
# table.tsv.gz/make() runtime: 35 min ("34m55.802s") w/o upload
146

    
147
func_override table.tsv/make__src_table_run
148
table.tsv/make() # usage: [remake=1] [limit=...] .../run table.tsv/make
149
# note that incremental output is provided right away with --quick (unbuffered),
150
# but takes awhile to become visible in Macfusion sshfs
151
#
152
# to view runtime when using `screen`:
153
# press Ctrl-A [ , use arrow keys/Ctrl-B/Ctrl-F, and then Esc to leave copy mode
154
{
155
	begin_target; check_target_exists
156
	
157
	table/make
158
	with_rm table.tsv/make__src_table_run
159
}
160

    
161
# import() runtime: 5 h ("294m10.322s") @starscream w/o table.tsv.gz/make()
162
# - table cleanup runtime: 1.5 h ("1:29:35.949391 sec") @starscream
163

    
164
fi
(8-8/11)