Project

General

Profile

1 8291 aaronmk
#!/bin/bash -e
2 8287 aaronmk
. "$(dirname "${BASH_SOURCE[0]}")"/../table.run
3 8303 aaronmk
4 8705 aaronmk
if self_not_included; then
5
6 9771 aaronmk
full_table="${table%_plants}"
7
8 9461 aaronmk
herbaria_filter.ih.csv_/make()
9 9434 aaronmk
{
10
	echo_func; set_make_vars; check_target_exists
11
12
	use_local
13 10269 aaronmk
	limit= query='SELECT DISTINCT "specimenHolderInstitutions" FROM herbaria.ih ORDER BY "specimenHolderInstitutions"' \
14 9434 aaronmk
to_target pg_export
15
}
16
17 9779 aaronmk
ih_herbarium/make() # stores IH herbaria
18
{
19
	echo_func; set_make_vars
20
21
	use_local_remote
22
	local table="$target_stem"
23
	if remaking; then mk_drop|mysql; fi
24
25
	herbaria_filter.ih.csv_/make
26
	(benign_error=1 mysql <<'EOF'
27
CREATE TABLE `ih_herbarium` (
28
 `institution_code` varchar(255) NOT NULL PRIMARY KEY
29
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='from IH.vegpath.org?&Index+Herbariorum'
30
EOF
31
) || skip_table || return 0
32
	append=1 mysql_import <"$top_dir"/herbaria_filter.ih.csv_
33
}
34
35 9768 aaronmk
plant_fraction/make() # runtime: 1 hr ("10603 rows affected (53 min 44.68 sec)")
36 9483 aaronmk
{
37
	echo_func; set_make_vars; mk_table_esc
38
39
	use_local_remote
40 9777 aaronmk
	if remaking; then table=plant_fraction table_esc= mk_drop|mysql; fi
41 9626 aaronmk
	(benign_error=1 mysql <<'EOF'
42
CREATE TABLE `plant_fraction` (
43 9592 aaronmk
 `institution_code` varchar(255) NOT NULL PRIMARY KEY,
44 9629 aaronmk
 `plant_fraction` float NOT NULL,
45
 KEY `plant_fraction` (`plant_fraction`)
46 9628 aaronmk
) ENGINE=MyISAM DEFAULT CHARSET=utf8
47 9592 aaronmk
EOF
48 9626 aaronmk
) || table=plant_fraction skip_table || return 0
49 9777 aaronmk
	mysql <<EOF
50 9592 aaronmk
INSERT INTO "plant_fraction"
51 9483 aaronmk
SELECT
52
  institution_code
53 9766 aaronmk
, COUNT(NULLIF(
54
	family LIKE '%aceae%' OR family IN ('Compositae','Gramineae','Palmae','Guttiferae','Cruciferae','Labiatae','Umbelliferae','Leguminosae')
55
	, false))/COUNT(*)
56 9483 aaronmk
	AS plant_fraction
57
FROM ${limit:+(SELECT * FROM }$table_esc ${limit:+$(prefix= limit)) s}
58
GROUP BY institution_code
59
ORDER BY institution_code
60
EOF
61
}
62
63 9637 aaronmk
plant_fraction/seal()
64 9633 aaronmk
{
65
	echo_func; set_make_vars
66
67
	use_local_remote
68
	table=plant_fraction table_esc= mysql_seal_table
69
}
70
71 9639 aaronmk
plant_fraction_for_herbaria_filter/make()
72
{
73
	echo_func; set_make_vars; mk_table_esc
74
75 9705 aaronmk
	plant_fraction/make
76 9639 aaronmk
	use_local_remote
77
	if remaking; then
78 9777 aaronmk
		table=plant_fraction_for_herbaria_filter table_esc= mk_drop|mysql
79 9639 aaronmk
	fi
80
	(benign_error=1 mysql <<'EOF'
81
CREATE VIEW `plant_fraction_for_herbaria_filter` AS
82
SELECT * FROM `plant_fraction`
83
WHERE plant_fraction >= 0.8
84
EOF
85
) || table=plant_fraction_for_herbaria_filter skip_table || return 0
86
}
87
88 9770 aaronmk
herbaria_filter/make() # runtime: ~0 ("0m2.235s"): CSV import, index scans only
89 9433 aaronmk
{
90
	echo_func; set_make_vars
91
92 9659 aaronmk
	use_local_remote
93
	local table="$target_stem"
94 9777 aaronmk
	if remaking; then mk_drop|mysql; fi
95 9659 aaronmk
96 9780 aaronmk
	ih_herbarium/make
97 9661 aaronmk
	plant_fraction_for_herbaria_filter/make
98 9434 aaronmk
99 9643 aaronmk
	(benign_error=1 mysql <<'EOF'
100
CREATE TABLE `herbaria_filter` (
101 9663 aaronmk
 `institution_code` varchar(255) NOT NULL PRIMARY KEY
102 9780 aaronmk
) ENGINE=MyISAM DEFAULT CHARSET=utf8
103 9433 aaronmk
EOF
104 9643 aaronmk
) || skip_table || return 0
105 9777 aaronmk
	mysql <<EOF
106 9780 aaronmk
INSERT IGNORE INTO $table
107
SELECT institution_code FROM ih_herbarium
108
;
109
INSERT IGNORE INTO $table
110 9661 aaronmk
SELECT institution_code FROM plant_fraction_for_herbaria_filter
111 9780 aaronmk
;
112 9661 aaronmk
EOF
113 9433 aaronmk
}
114
115 9665 aaronmk
herbaria_filter/seal()
116
{
117
	echo_func; set_make_vars
118
119
	use_local_remote
120
	table=herbaria_filter table_esc= mysql_seal_table
121
}
122
123 9772 aaronmk
table/make() # makes the filter view
124
{
125
	echo_func; set_make_vars; mk_table_esc
126
127 9774 aaronmk
	herbaria_filter/make
128 9772 aaronmk
	use_local_remote
129 9777 aaronmk
	if remaking; then mk_drop|mysql; fi
130 9772 aaronmk
	(benign_error=1 mysql <<EOF
131
CREATE VIEW $table AS
132
SELECT * FROM $full_table
133
WHERE deleted IS NULL
134
AND institution_code IN (SELECT * FROM herbaria_filter)
135 9778 aaronmk
AND (
136
	    family IS NULL
137
	OR  (family LIKE '%aceae%' OR family IN ('Compositae','Gramineae','Palmae','Guttiferae','Cruciferae','Labiatae','Umbelliferae','Leguminosae'))
138
)
139 9772 aaronmk
EOF
140 9773 aaronmk
) || skip_table || return 0
141 9772 aaronmk
}
142
143 9821 aaronmk
# table.tsv.gz/upload() runtime: 15 min ("2184253533 100% 2.30MB/s 0:15:03")
144
145 9822 aaronmk
# table.tsv.gz/make() runtime: 35 min ("34m55.802s") w/o upload
146 9755 aaronmk
147 9155 aaronmk
func_override table.tsv/make__src_table_run
148 9348 aaronmk
table.tsv/make() # usage: [remake=1] [limit=...] .../run table.tsv/make
149 9776 aaronmk
# note that incremental output is provided right away with --quick (unbuffered),
150
# but takes awhile to become visible in Macfusion sshfs
151 9781 aaronmk
#
152
# to view runtime when using `screen`:
153 9819 aaronmk
# press Ctrl-A [ , use arrow keys/Ctrl-B/Ctrl-F, and then Esc to leave copy mode
154 8303 aaronmk
{
155 9676 aaronmk
	echo_func; set_make_vars; check_target_exists
156 9357 aaronmk
157 9775 aaronmk
	table/make
158
	self_make table.tsv/make__src_table_run
159 8303 aaronmk
}
160 8705 aaronmk
161 11869 aaronmk
# import() runtime: 5 h ("294m10.322s") @starscream w/o table.tsv.gz/make()
162
# - table cleanup runtime: 1.5 h ("1:29:35.949391 sec") @starscream
163 9823 aaronmk
164 8705 aaronmk
fi