Revision 14817
Added by Aaron Marcuse-Kubitza about 10 years ago
input.Makefile | ||
---|---|---|
1 |
selfDir_uZPPqC := $(dir $(lastword $(MAKEFILE_LIST))) |
|
2 |
root := $(selfDir_uZPPqC).. |
|
3 |
include $(root)/lib/common.Makefile |
|
4 |
|
|
5 |
|
|
6 |
##### Configuration |
|
7 |
|
|
8 |
# Command line |
|
9 |
continue ?= |
|
10 |
debug ?= |
|
11 |
full_import ?= |
|
12 |
import_source ?= 1 |
|
13 |
is_view ?= |
|
14 |
log ?= $(if $(test),,1) |
|
15 |
noclobber ?= |
|
16 |
profile ?= |
|
17 |
quiet ?= |
|
18 |
reverify ?= 1 |
|
19 |
schema_only ?= |
|
20 |
use_staged ?= $(by_col) |
|
21 |
|
|
22 |
# Makefile |
|
23 |
exts ?= csv tsv tab txt dat dmp |
|
24 |
test_n ?= 2 |
|
25 |
|
|
26 |
##### Vars/functions |
|
27 |
|
|
28 |
# Paths |
|
29 |
datasrc := $(patsubst .%,%,$(notdir $(realpath .))) |
|
30 |
bin := $(root)/bin |
|
31 |
mappings := $(root)/mappings |
|
32 |
|
|
33 |
# Make |
|
34 |
SHELL := /bin/bash |
|
35 |
selfMake = $(MAKE) --makefile=../input.Makefile |
|
36 |
subMake = $(MAKE) "$(@:$(root)/%=%)" --directory=$(root) |
|
37 |
+_ = $(+:_%=) |
|
38 |
# used to filter the output of embedded $(shell make ...) invocations |
|
39 |
filter_make := grep -vF -e lib -e ".Makefile'." |
|
40 |
addBeforeExt = $(basename $(2))$(1)$(suffix $(2)) |
|
41 |
|
|
42 |
# Terminal |
|
43 |
termCols := $(shell tput cols) |
|
44 |
|
|
45 |
# Commands |
|
46 |
MKDIR = mkdir -p |
|
47 |
mkdir = $(MKDIR) $(@D) |
|
48 |
diff = diff --unified=2 |
|
49 |
diffIgnoreSpace = $(diff) --ignore-space-change |
|
50 |
diffVerbose = $(if $(verbose),diff --side-by-side --left-column\ |
|
51 |
--width=$(termCols),$(diff)) |
|
52 |
|
|
53 |
# BIEN commands |
|
54 |
sortFilenames = $(shell $(bin)/sort_filenames $(1)) |
|
55 |
selfMap = $(bin)/cols 0 0 |
|
56 |
psqlAsBien := $(bin)/psql_verbose_vegbien |
|
57 |
psqlNoSearchPath := env no_search_path=1 $(psqlAsBien) |
|
58 |
# Usage: ($(inDatasrc); cat $(file))|$(psqlCmd) |
|
59 |
inDatasrc := echo 'SET search_path TO "$(datasrc)";' |
|
60 |
|
|
61 |
# SVN |
|
62 |
setSvnIgnore = svn propset svn:ignore $(2) $(1) |
|
63 |
define addDirWithIgnore |
|
64 |
$(addDir) |
|
65 |
$(setSvnIgnore) |
|
66 |
endef |
|
67 |
|
|
68 |
##### Environment |
|
69 |
|
|
70 |
export PATH := $(bin):$(PATH) |
|
71 |
|
|
72 |
##### General targets |
|
73 |
|
|
74 |
all: _always maps ; |
|
75 |
|
|
76 |
clean: _always |
|
77 |
$(RM) $(all) |
|
78 |
|
|
79 |
remake: _always clean |
|
80 |
+$(selfMake) |
|
81 |
# re-run make so that cache of existing files is reset |
|
82 |
|
|
83 |
# Only remake if doesn't exist. This prevents unintentional remaking when the |
|
84 |
# make script is newly checked out from svn (which sets the mod time to now) but |
|
85 |
# the output is synced externally. |
|
86 |
# Can't remove prereq to do this, because it determines when the rule applies. |
|
87 |
make_script = $(if $(wildcard $@),,"time" ./$< >$@) |
|
88 |
|
|
89 |
%/: %/map.csv _always ; |
|
90 |
|
|
91 |
%/: % _always ; |
|
92 |
|
|
93 |
%: %.make |
|
94 |
$(make_script) |
|
95 |
.PRECIOUS: % # save partial outputs of aborted src make scripts |
|
96 |
|
|
97 |
##### Tables discovery |
|
98 |
|
|
99 |
sortFile := import_order.txt |
|
100 |
noImportFile := _no_import |
|
101 |
|
|
102 |
dontImport = $(wildcard $(noImportFile))$(wildcard $(1)/$(noImportFile))$(if\ |
|
103 |
$(import_source),,$(filter Source,$(1))) |
|
104 |
|
|
105 |
ifeq ($(sort_file_updated),)# keep $(sortFile) up-to-date |
|
106 |
$(shell sort_file_updated=1 $(selfMake) $(sortFile)|$(filter_make) >&2) |
|
107 |
export sort_file_updated=1 |
|
108 |
endif |
|
109 |
|
|
110 |
sort_file_tables := $(if $(wildcard $(sortFile)),$(shell cat $(sortFile))) |
|
111 |
# $(shell) replaces "\n" with " " |
|
112 |
tables := $(sort_file_tables) |
|
113 |
allSubdirs := $(call wildcard/,*/) |
|
114 |
allTables := $(call sortFilenames,$(filter-out _% verify logs,$(allSubdirs:%/=%))) |
|
115 |
joinedTables := $(filter-out $(tables),$(allTables)) |
|
116 |
allTables := $(strip $(joinedTables) $(tables))# move joined tables to beginning |
|
117 |
has_visible_files = $(call wildcard/,$(1)/*) |
|
118 |
allTables := $(foreach table,$(allTables),$(if\ |
|
119 |
$(call has_visible_files,$(table)),$(table))) |
|
120 |
ifeq ($(tables),)# none specified in sort file |
|
121 |
tables := $(allTables) |
|
122 |
endif |
|
123 |
importTables := $(foreach table,$(tables),$(if\ |
|
124 |
$(call dontImport,$(table)),,$(table))) |
|
125 |
|
|
126 |
list_tables: _always # use `make -s` to avoid echoing commands |
|
127 |
@for table in $(tables); do echo "$$table"; done |
|
128 |
|
|
129 |
$(sortFile): _always |
|
130 |
$(if $(filter-out $(sort_file_tables),$(tables)),$(selfMake) -s list_tables >$@) |
|
131 |
# add any missing tables to $(sortFile) |
|
132 |
|
|
133 |
##### SVN |
|
134 |
|
|
135 |
svnFilesGlob:= */{,{,.}{data,map,VegBIEN}.csv{,.*},*header.*,*.sql,test.xml.ref} |
|
136 |
svnFilesGlob := {map.csv,*{grants,validations,~}*.sql,{,*/}{*.{log,make},*terms.csv},$(svnFilesGlob)} |
|
137 |
_svnFilesGlob := {_MySQL/{,*.make},{,*/}{$(noImportFile),*{run,Makefile},*.{md5,url},*README.TXT}} |
|
138 |
svnFiles = $(filter-out _% logs/% %.data.sql,$(call wildcard/,$(svnFilesGlob)))\ |
|
139 |
$(call wildcard/,$(_svnFilesGlob)) |
|
140 |
|
|
141 |
add: _always $(if $(call dontImport,.),,add!) ; |
|
142 |
|
|
143 |
# To update all inputs with these settings: make inputs/add |
|
144 |
add!: _always Source/add $(allTables:%=%/add) |
|
145 |
$(call setSvnIgnore,.,'*') |
|
146 |
$(call addDirWithIgnore,logs,$$'*.gz\n*.log.sql\n*.trace') |
|
147 |
$(call addDirWithIgnore,verify,$$'*.csv\n*.log\n*.out\n*.tsv\n*.txt\n*.xls\n*.xlsx') |
|
148 |
$(call addFile,import_order.txt) |
|
149 |
$(if $(wildcard _MySQL/),$(call addDirWithIgnore,_MySQL,'*')) |
|
150 |
$(if $(wildcard _src/),$(call addDirWithIgnore,_src,'*')) |
|
151 |
$(if $(wildcard _archive/),$(call addDirWithIgnore,_archive,'*')) |
|
152 |
# invoke externally to clear $$(wildcard) cache before expanding $$(svnFiles) |
|
153 |
$(selfMake) add_files |
|
154 |
|
|
155 |
add_files: _always |
|
156 |
$(call add*,$(svnFiles)) |
|
157 |
|
|
158 |
# Adds a new table subdir |
|
159 |
%/add: _always |
|
160 |
$(call addDirWithIgnore,$*,'*') |
|
161 |
$(call addDirWithIgnore,$*/logs,$$'*.gz\n*.log.sql\n*.trace') |
|
162 |
|
|
163 |
##### Existing maps discovery |
|
164 |
|
|
165 |
anyMap := %/map.csv %/VegBIEN.csv %/unmapped_terms.csv %/new_terms.csv |
|
166 |
|
|
167 |
exts := $(call ci,$(exts)) |
|
168 |
extsFilter := $(addprefix %.,$(exts)) |
|
169 |
dataOnly = $(filter $(extsFilter),$(1)) |
|
170 |
|
|
171 |
anyTest = $*/test.% |
|
172 |
srcsOnly = $(filter-out $(anyMap) $(anyTest) %/logs,$(call dataOnly,$(1))) |
|
173 |
|
|
174 |
srcDict := map.csv |
|
175 |
|
|
176 |
vocab := $(mappings)/VegCore.vocab.csv |
|
177 |
thesaurus := $(mappings)/VegCore.thesaurus.csv |
|
178 |
coreMap := $(mappings)/VegCore-VegBIEN.csv |
|
179 |
|
|
180 |
viaMaps := $(tables:%=%/map.csv) |
|
181 |
|
|
182 |
autogenMaps := $(subst map.,VegBIEN.,$(viaMaps)) |
|
183 |
directMaps := $(autogenMaps) $(filter-out $(autogenMaps),\ |
|
184 |
$(wildcard */VegBIEN.csv)) |
|
185 |
|
|
186 |
##### Sources |
|
187 |
|
|
188 |
srcs = $(call sortFilenames,$(call srcsOnly,$(wildcard $*/*))) |
|
189 |
nonHeaderSrcs = $(filter-out %/header.csv %/header.txt,$(srcs)) |
|
190 |
isRef = $(if $(nonHeaderSrcs),,1) |
|
191 |
# empty subdir, so references an already-installed staging table |
|
192 |
catSrcs = $(bin)/cat_csv $(nonHeaderSrcs) |
|
193 |
withCatSrcs = $(catSrcs:$(bin)/%=$(bin)/with_%) -- |
|
194 |
|
|
195 |
%/list_srcs: _always # use `make -s` to avoid echoing commands |
|
196 |
echo $(nonHeaderSrcs) |
|
197 |
|
|
198 |
# Run with `make -s` to avoid echoing make commands |
|
199 |
cat: $(importTables:%=%/cat) _always ; |
|
200 |
|
|
201 |
%/cat: _always |
|
202 |
$(catSrcs) |
|
203 |
|
|
204 |
##### Input data retrieval |
|
205 |
|
|
206 |
# Must come before `%.sql: _MySQL/%.sql` to override it |
|
207 |
%.sql: %.sql.make |
|
208 |
$(make_script) |
|
209 |
|
|
210 |
# The export must be created with: |
|
211 |
# `--compatible=postgresql --add-locks=false --set-charset --no-create-info` |
|
212 |
# Must come before `%.sql: _MySQL/%.sql` to override it |
|
213 |
%.data.sql: _MySQL/%.data.sql |
|
214 |
$(if $(wildcard $@),,$(bin)/my2pg.data <$< >$@) |
|
215 |
|
|
216 |
# The export must be created with: |
|
217 |
# `--compatible=postgresql --add-locks=false --set-charset` |
|
218 |
# Add `--no-data` to create a schema-only export. |
|
219 |
%.sql: _MySQL/%.sql |
|
220 |
$(if $(wildcard $@),,$(bin)/my2pg <$< >$@) |
|
221 |
|
|
222 |
##### Staging tables installation |
|
223 |
|
|
224 |
srcTable := %.src |
|
225 |
|
|
226 |
dbExportsWildcard = $(sort $(patsubst _MySQL/%.make,%,$(wildcard\ |
|
227 |
$(1) _MySQL/$(1).make))) |
|
228 |
|
|
229 |
dbExports := $(call dbExportsWildcard,*schema*.sql)# schemas first |
|
230 |
ifeq ($(schema_only),) # add rest of .sql files |
|
231 |
dbExports += $(filter-out $(dbExports),$(call dbExportsWildcard,*.sql)) |
|
232 |
endif |
|
233 |
dbExports := $(filter-out grants.sql,$(dbExports)) |
|
234 |
dbExports := $(strip $(dbExports))# += adds extra whitespace |
|
235 |
allInstalls := $(if $(dbExports),sql) $(allTables) |
|
236 |
|
|
237 |
datasrc_schema_exists = $(shell schema=$(datasrc)\ |
|
238 |
$(root)/lib/runscripts/local.run pg_schema_exists && echo t) |
|
239 |
|
|
240 |
install: _always |
|
241 |
$(if $(wildcard ./run),./run install,$(if $(wildcard table.run),$(if\ |
|
242 |
$(datasrc_schema_exists),,+$(selfMake) install_oldstyle))) |
|
243 |
+$(selfMake) validate/install |
|
244 |
# table.run: only run this for datasource dirs |
|
245 |
|
|
246 |
install_oldstyle: _always schema $(allInstalls:%=%/install) ; |
|
247 |
|
|
248 |
uninstall: _always confirm_rm_schema rm_schema ; |
|
249 |
# rm_schema will also drop all staging tables |
|
250 |
|
|
251 |
reinstall: _always uninstall install ; |
|
252 |
|
|
253 |
confirm_rm_schema: _always |
|
254 |
$(if $(filter TNRS,$(datasrc)),$(call confirm,WARNING: This will delete the\ |
|
255 |
TNRS cache!,To save it: make backups/TNRS.backup-remake)) |
|
256 |
|
|
257 |
schema: _always |
|
258 |
-echo 'CREATE SCHEMA "$(datasrc)";'|$(psqlNoSearchPath) |
|
259 |
# ignore errors if schema exists |
|
260 |
|
|
261 |
rm_schema: _always |
|
262 |
echo 'DROP SCHEMA IF EXISTS "$(datasrc)" CASCADE;'|$(psqlNoSearchPath) |
|
263 |
|
|
264 |
installLog := logs/install.log.sql |
|
265 |
|
|
266 |
log_dir = $(1)logs |
|
267 |
has_log_dir = $(call and,$(wildcard $(log_dir)),$(call not,$(noclobber))) |
|
268 |
logInstall = $(if $(has_log_dir),$(if $(quiet),$(2)$(1)$(installLog)\ |
|
269 |
2>&1,2>&1|tee $(3) $(1)$(installLog))) |
|
270 |
logInstallRoot = $(call logInstall,,>) |
|
271 |
logInstall* = $(call logInstall,$*/,>) |
|
272 |
logInstall*Add = $(call logInstall,$*/,>>,-a)# append to log |
|
273 |
|
|
274 |
# Must come before %/install to override it |
|
275 |
sql/install: $(dbExports) |
|
276 |
set -o pipefail; ($(inDatasrc); $(if $(wildcard schema.sql),cat schema.sql;)\ |
|
277 |
cat $(filter-out schema.sql grants.sql,$+)|pg_dump_limit $(if $(wildcard\ |
|
278 |
grants.sql),; cat grants.sql))|"time" env no_search_path=1 \ |
|
279 |
$(bin)/psql_$(if $(debug),verbose,script)_vegbien --set=schema='"$(datasrc)"' \ |
|
280 |
$(logInstallRoot) |
|
281 |
|
|
282 |
# $debug option runs the *.sql import verbosely, to display which statements are |
|
283 |
# being run. this should only be used for SQL files that use COPY FROM to import |
|
284 |
# data, to avoid echoing pages of insert statements. |
|
285 |
cleanup = set -o pipefail; \ |
|
286 |
$(if $(wildcard $*/cleanup.sql),($(inDatasrc); cat $*/cleanup.sql)\ |
|
287 |
|"time" $(psqlNoSearchPath) --echo-all --set=table='"$*"' $(logInstall*Add),\ |
|
288 |
(export schema=$(datasrc) table=$*; . $(bin)/vegbien_dest; unset schemas; \ |
|
289 |
$(bin)/csv2db) $(logInstall*Add)) |
|
290 |
|
|
291 |
%/header.csv: |
|
292 |
set -o pipefail; \ |
|
293 |
echo 'COPY (SELECT * FROM "$(datasrc)"."$*" LIMIT 0) TO STDOUT CSV HEADER;'|\ |
|
294 |
env no_search_path=1 $(bin)/psql_script_vegbien >$*/header.csv |
|
295 |
|
|
296 |
exportHeader = $(selfMake) "$*/header.csv" |
|
297 |
|
|
298 |
# Don't try to edit a view. Must come before %/install to override it. |
|
299 |
%_view/install: _always ; |
|
300 |
|
|
301 |
%.sql/run: _always |
|
302 |
$(if $(wildcard $(@D)),($(inDatasrc); cat $(@D))|(cd '$(*D)';\ |
|
303 |
"time" env no_search_path=1 ../$(bin)/psql_verbose_vegbien \ |
|
304 |
--set=table='"$(*D)"' --set=table_str=\''"$(*D)"'\')) |
|
305 |
|
|
306 |
%/postprocess.sql: $(thesaurus) _always |
|
307 |
$(if $(wildcard $*/run),$(bin)/in_place $@ env text=1 $(bin)/repl $<) |
|
308 |
|
|
309 |
%/postprocess: _always |
|
310 |
$(if\ |
|
311 |
$(wildcard $*/run),$*/run postprocess,$(selfMake) "$*/postprocess.sql/run") |
|
312 |
|
|
313 |
%/map_table: _always |
|
314 |
$(if $(wildcard $*/run),$*/run map_table) |
|
315 |
|
|
316 |
# For staging tables which are derived by joining together other staging tables. |
|
317 |
%/install: %/create.sql _always |
|
318 |
set -o pipefail; \ |
|
319 |
($(inDatasrc); echo 'CREATE TABLE "$*" AS'; cat $<; echo ';')|"time" \ |
|
320 |
$(psqlNoSearchPath) --echo-all --set=schema='"$(datasrc)"' --set=table='"$*"' \ |
|
321 |
$(logInstall*) |
|
322 |
$(exportHeader) |
|
323 |
$(selfMake) "$*/postprocess" |
|
324 |
$(cleanup) |
|
325 |
|
|
326 |
%/install: _always |
|
327 |
$(import_install_) |
|
328 |
$(exportHeader) |
|
329 |
$(selfMake) "$*/postprocess" |
|
330 |
$(cleanup) |
|
331 |
define import_install_ |
|
332 |
set -o pipefail; (. $(bin)/vegbien_dest; unset schemas; "time" $(nice)\ |
|
333 |
env schema=$(datasrc) table=$* $(bin)/csv2db $(catSrcs) $(logInstall*)) |
|
334 |
$(if $(filter $(srcTable),$*),($(inDatasrc);\ |
|
335 |
echo 'ALTER TABLE "$(datasrc)"."$*" RENAME row_num TO "$*.row_num";')|"time"\ |
|
336 |
$(psqlNoSearchPath) --echo-all --set=table='"$*"' $(logInstall*Add)) |
|
337 |
endef |
|
338 |
# table-scope src table's row_num col to allow joining it with other tables |
|
339 |
|
|
340 |
%/uninstall: _always |
|
341 |
echo 'DROP $(if\ |
|
342 |
$(is_view),VIEW,TABLE) IF EXISTS "$(datasrc)"."$*" CASCADE;'|$(psqlNoSearchPath) |
|
343 |
|
|
344 |
%/reinstall: _always %/uninstall %/install ; |
|
345 |
|
|
346 |
postprocess: _always $(allTables:%=%/postprocess) ; |
|
347 |
|
|
348 |
cleanup: _always $(tables:%=%/cleanup) ; |
|
349 |
|
|
350 |
# WARNING: This removes any index comments, due to a PostgreSQL bug. |
|
351 |
# This occurs because ALTER TABLE recreates the index but not its comment. |
|
352 |
%/cleanup: _always |
|
353 |
$(cleanup) |
|
354 |
|
|
355 |
##### Maps building |
|
356 |
|
|
357 |
# Maps to (try to) build are added to this |
|
358 |
maps := |
|
359 |
|
|
360 |
srcRoot = $(mappings)/root.sh |
|
361 |
mkSrcMap = (. $(srcRoot); env datasrc=$(datasrc) $(bin)/src_map <$*/header.csv\ |
|
362 |
>$@) |
|
363 |
|
|
364 |
translate = $(if $(wildcard $(1)),$(bin)/in_place $< $(bin)/translate_ci 1 $(1)) |
|
365 |
|
|
366 |
$(srcDict):# empty target in case it doesn't exist |
|
367 |
|
|
368 |
# Via maps cleanup |
|
369 |
ifneq ($(filter %/.map.csv.last_cleanup,$(MAKECMDGOALS)),) |
|
370 |
%/.map.csv.last_cleanup: %/map.csv $(vocab) $(thesaurus) $(coreMap) $(srcDict) |
|
371 |
$(call translate,$(srcDict)) |
|
372 |
$(bin)/in_place $< $(bin)/canon 1 $(vocab) |
|
373 |
$(call translate,$(thesaurus)) |
|
374 |
$(bin)/in_place $< $(bin)/fix_line_endings |
|
375 |
touch $@ |
|
376 |
+$(selfMake) $(<:%/map.csv=%/unmapped_terms.csv) |
|
377 |
+$(selfMake) $(<:%/map.csv=%/new_terms.csv) |
|
378 |
.PRECIOUS: %/.map.csv.last_cleanup |
|
379 |
else |
|
380 |
%/map.csv: _always |
|
381 |
$(if $(wildcard $@),,$(mk_map_csv)) |
|
382 |
+$(selfMake) $(@:%/map.csv=%/.map.csv.last_cleanup) |
|
383 |
define mk_map_csv |
|
384 |
+$(selfMake) "$*/header.csv" |
|
385 |
$(mkSrcMap) |
|
386 |
endef |
|
387 |
endif |
|
388 |
|
|
389 |
%/VegBIEN.csv: %/map.csv $(coreMap) |
|
390 |
$(if $(wildcard $*/run),-ln -s "../$(coreMap)" "$@"\ |
|
391 |
,<$< $(bin)/cat_cols 1 2|$(bin)/join $(coreMap)|$(bin)/sort_map >$@) |
|
392 |
# ignore errors if symlink exists |
|
393 |
maps += $(autogenMaps) |
|
394 |
|
|
395 |
maps: $(maps) _always ; |
|
396 |
|
|
397 |
all += $(maps) |
|
398 |
|
|
399 |
##### Maps validation |
|
400 |
|
|
401 |
# `tail -n +2`: Remove header before running filter_out_ci because filter_out_ci |
|
402 |
# only removes the header if it matches the vocabulary's header. |
|
403 |
|
|
404 |
%/unmapped_terms.csv: %/map.csv $(coreMap) |
|
405 |
tail -n +2 $<|$(bin)/cols 1|$(bin)/filter_out_ci 0 $(coreMap) >$@ |
|
406 |
$(bin)/autoremove $@ |
|
407 |
|
|
408 |
%/new_terms.csv: %/map.csv $(vocab) $(thesaurus) %/unmapped_terms.csv |
|
409 |
$(newTerms) |
|
410 |
$(bin)/autoremove $@ |
|
411 |
newTerms = tail -n +2 $<|$(bin)/filter_out_ci 0 $(vocab)|$(bin)/filter_out_ci 0\ |
|
412 |
$(thesaurus) $(if $(wildcard $(word 4,$+)),|$(bin)/filter_out_ci 0 $(word 4,$+))\ |
|
413 |
|grep -vE -e '^"?:' -e 'UNUSED' >$@; exit 0 # grep exits nonzero if no match |
|
414 |
|
|
415 |
termsSubdirs := $(tables) |
|
416 |
|
|
417 |
include $(root)/lib/mappings.Makefile |
|
418 |
|
|
419 |
##### External dependencies |
|
420 |
|
|
421 |
$(root)/%: _always |
|
422 |
+$(subMake) |
|
423 |
.PRECIOUS: $(root)/% # let ext. dir's Makefile decide whether to delete on error |
|
424 |
|
|
425 |
##### Mapping |
|
426 |
|
|
427 |
+maps = $(filter %/map.csv %/VegBIEN.csv $(mappings)/%,$(+_)) |
|
428 |
map2db = env in_database=vegbien in_schema=$(datasrc) in_table=$*\ |
|
429 |
out_database=vegbien source=$(datasrc).new $(root)/map $(+maps) |
|
430 |
|
|
431 |
##### Import to VegBIEN |
|
432 |
|
|
433 |
import_temp: $(importTables:%=%/import) _always ; |
|
434 |
|
|
435 |
import: _always import_temp publish validate ; # removes temp suffix when done |
|
436 |
|
|
437 |
rm: _always |
|
438 |
echo "SELECT datasource_rm('$(datasrc)');"|"time" $(psqlAsBien) |
|
439 |
|
|
440 |
reimport: _always import ; # import replaces the previous import |
|
441 |
|
|
442 |
profileTest = $(if $(profile),$(if $(test),1)) |
|
443 |
profileOnly = -env profile_to=/dev/fd/3 $(map2db) 3>&1 1>&2|\ |
|
444 |
$(bin)/profile_stats /dev/fd/0 |
|
445 |
|
|
446 |
log_ = $*/logs/$(if $(n),n=$(n).,)$(version).log.sql |
|
447 |
|
|
448 |
# Displays the import log file path |
|
449 |
# Run with `make -s` to avoid echoing make commands |
|
450 |
%/log_file: _always |
|
451 |
echo $$(pwd)/$(log_) |
|
452 |
|
|
453 |
trace = $(log_:.log.sql=.trace) |
|
454 |
restart_row = $(shell set -x; grep -F Partition: $(log_)|tail -1|$(sed)\ |
|
455 |
's/^.* rows ([[:digit:]]+)-.*$$/\1/') |
|
456 |
import = $(if $(profileTest),$(profileOnly),(set -x; date; "time" env commit=1\ |
|
457 |
$(if $(profile),profile_to=$(trace)) $(if $(continue),start=$(restart_row))\ |
|
458 |
$(map2db))$(if $(log), >>$(log_) 2>&1)) |
|
459 |
|
|
460 |
import? = $(if $(call and,$(full_import),$(call dontImport,.)),,$(import)) |
|
461 |
|
|
462 |
%/import_temp: %/VegBIEN.csv _always |
|
463 |
$(if $(full_import),-)$(import?) |
|
464 |
# don't abort on import errors, which often relate to invalid input data |
|
465 |
# default: |
|
466 |
%/import_temp: _always ; |
|
467 |
|
|
468 |
%/import: %/import_temp _always ; |
|
469 |
|
|
470 |
#### Taxonomic scrubbing |
|
471 |
|
|
472 |
scrub: _always |
|
473 |
$(selfMake) $(root)/scrub & echo $$! |
|
474 |
# using & (background process) also ignores TNRS errors, so that TNRS bugs do |
|
475 |
# not prevent the remaining tables from being imported even if TNRS can't be run |
|
476 |
|
|
477 |
# Removes TNRS taxondeterminations |
|
478 |
unscrub: _always |
|
479 |
echo "SELECT delete_scrubbed_taxondeterminations('$(datasrc)');"|\ |
|
480 |
"time" env no_query_results=1 $(psqlAsBien) |
|
481 |
|
|
482 |
rescrub: _always unscrub scrub ; |
|
483 |
|
|
484 |
import_scrub: _always import scrub ; |
|
485 |
|
|
486 |
reimport_scrub: _always reimport scrub ; |
|
487 |
|
|
488 |
%/import_scrub: _always %/import scrub ; |
|
489 |
|
|
490 |
##### Log files from import |
|
491 |
|
|
492 |
logs := $(wildcard */logs/*.log.sql */logs/*.trace) |
|
493 |
|
|
494 |
rm_logs: _always |
|
495 |
$(RM) $(logs) |
|
496 |
|
|
497 |
##### Verification of import |
|
498 |
|
|
499 |
### new-style aggregating validations (also work w/ old-style import) |
|
500 |
|
|
501 |
# must be in input.Makefile instead of table.run because some datasources that |
|
502 |
# we validate still use old-style import |
|
503 |
|
|
504 |
# validations.sql must be in a subdir so it won't get run by sql/install |
|
505 |
validate/install: _always verify/validations.sql/run ; |
|
506 |
|
|
507 |
validate: _always |
|
508 |
echo "SELECT remake_diff_tables('$(datasrc)');"\ |
|
509 |
|$(psqlAsBien)$(if $(log), >>.$(log_) 2>&1) |
|
510 |
|
|
511 |
### old-style aggregating validations (not related to old-style import) |
|
512 |
|
|
513 |
verifyTables := $(patsubst verify/%.ref,%,$(wildcard verify/*.ref)) |
|
514 |
|
|
515 |
verify: $(verifyTables:%=%/verify) _always ; |
|
516 |
|
|
517 |
%/verify: verify/%.ref verify/%.out _always |
|
518 |
-$(diffVerbose) $(+_) |
|
519 |
# don't abort on verification errors, which are expected during development |
|
520 |
# default: |
|
521 |
%/verify: verify/%.out _always |
|
522 |
$(if $(shell test -e $< && echo t),cat $<) |
|
523 |
# don't run if verify/%.out's default do-nothing action was used |
|
524 |
# can't use $(wildcard) because it won't recheck file after verify/%.out is run |
|
525 |
|
|
526 |
psqlExport := "time" $(psqlNoSearchPath) --no-align --field-separator=$$'\t'\ |
|
527 |
--pset=footer=off --pset=null=NULL |
|
528 |
# Note that using $(inDatasrc) will not work with datasources whose tables are |
|
529 |
# the same name as VegBIEN tables (likely only VegBank), because the datasource |
|
530 |
# is first in the search_path. |
|
531 |
verify = $(if $(reverify),($(inDatasrc); cat $<)|$(psqlExport)\ |
|
532 |
--set=datasource="'$(datasrc)'" >$@) |
|
533 |
|
|
534 |
verify/%.out: verify/%.out.sql _always |
|
535 |
$(verify) |
|
536 |
.PRECIOUS: verify/%.out # save partial output in case of error to help debugging |
|
537 |
# default: |
|
538 |
verify/%.out: _always ; |
|
539 |
|
|
540 |
all += $(wildcard verify/*.out) |
|
541 |
|
|
542 |
%.ref: %.ref.sql |
|
543 |
($(inDatasrc); cat $<)|$(psqlExport) >$@ |
|
544 |
.PRECIOUS: %.ref # there must always be a .ref for the make rules to work |
|
545 |
|
|
546 |
##### Editing import |
|
547 |
|
|
548 |
rename/%: _always |
|
549 |
echo "UPDATE source SET shortname = '$*' WHERE shortname = '$(datasrc)';"\ |
|
550 |
|$(psqlAsBien) |
|
551 |
|
|
552 |
%/publish: _always # usage: make inputs/src/src.version/publish |
|
553 |
echo "SELECT datasource_publish('$*');"|$(psqlAsBien) |
|
554 |
|
|
555 |
publish: _always $(datasrc).new/publish ; # usage: make inputs/src/publish |
|
556 |
|
|
557 |
##### Testing |
|
558 |
|
|
559 |
testRefOutput = $(subst .by_col,,$(1)) |
|
560 |
testRef = $(testRefOutput).ref |
|
561 |
hasOwnRef = $(filter $@,$(call testRefOutput,$@)) |
|
562 |
# filter returns non-empty if they are equal |
|
563 |
|
|
564 |
# `rm $@`: Remove outputs of successful tests to reduce clutter |
|
565 |
# `$(foreach use_staged...)`: Run with use_staged=1 |
|
566 |
define runTest |
|
567 |
@echo "Testing $(abspath $@)..." |
|
568 |
>$@ env test=1 n=$(test_n) $(1) $(foreach use_staged,1,$(map2db)) |
|
569 |
$(if $(hasOwnRef),,-)@(set -x; $(diffIgnoreSpace) $(call testRef,$@) $@) 2>&1\ |
|
570 |
&& rm $@ || { e=$$?; $(if $(wildcard $(call testRef,$@)),,cat $@;)\ |
|
571 |
$(if $(hasOwnRef),\ |
|
572 |
{\ |
|
573 |
read -p $(emph)'Accept new test output? (y/n)'$(endEmph) REPLY;\ |
|
574 |
if test "$$REPLY" = y; then\ |
|
575 |
(set -x; $(MAKE) $@-ok --directory=$(realpath .) --makefile=../input.Makefile);\ |
|
576 |
exit 0;\ |
|
577 |
fi;\ |
|
578 |
};,\ |
|
579 |
echo $(emph)"Note: The preceding failed test is compared to another test's\ |
|
580 |
output"$(endEmph);\ |
|
581 |
echo $(emph)"When it fails, this always indicates a bug"$(endEmph);\ |
|
582 |
)\ |
|
583 |
exit $$e;} |
|
584 |
endef |
|
585 |
|
|
586 |
tests := |
|
587 |
|
|
588 |
%/test: %/test.xml $(if $(by_col),%/test.by_col.xml) _always ; |
|
589 |
|
|
590 |
# Requires staging tables. To create them, run `make inputs/<datasrc>/install`. |
|
591 |
# Non-flat-file inputs fall back to mimicking a successful test |
|
592 |
%/test.xml: %/VegBIEN.csv _always |
|
593 |
$(call runTest,by_col=) |
|
594 |
tests += %/test.xml |
|
595 |
|
|
596 |
%/test.by_col.xml: %/VegBIEN.csv _always |
|
597 |
$(call runTest,by_col=1) |
|
598 |
|
|
599 |
# Only run column-based tests if column-based mode enabled, because these tests |
|
600 |
# are much slower than the row-based tests for small numbers of rows |
|
601 |
ifneq ($(by_col),) |
|
602 |
tests += %/test.by_col.xml |
|
603 |
endif |
|
604 |
|
|
605 |
testOutputs := $(foreach test,$(tests),$(tables:%=$(test))) |
|
606 |
|
|
607 |
.PRECIOUS: $(testOutputs) # save outputs of failed tests so they can be accepted |
|
608 |
|
|
609 |
test: _always $(testOutputs) ; |
|
610 |
|
|
611 |
all += $(wildcard %/test*.xml) |
|
612 |
|
|
613 |
# Accepts a test output: make <test_output_path>-ok |
|
614 |
%-ok: _always |
|
615 |
mv $* $(call testRef,$*) |
|
616 |
|
|
617 |
accept-all: _always |
|
618 |
+yes|$(selfMake) test |
|
619 |
|
|
620 |
##### Documentation |
|
621 |
|
|
622 |
steps = $(selfMake) -s $*/import test=1 by_col=1 verbosity=2 n=100\ |
|
623 |
2>&1|$(bin)/debug2redmine >$@ |
|
624 |
|
|
625 |
%/logs/steps.by_col.log.sql: _always |
|
626 |
+$(steps) |
|
1 |
selfDir_uZPPqC := $(dir $(lastword $(MAKEFILE_LIST))) |
|
2 |
root := $(selfDir_uZPPqC).. |
|
3 |
include $(root)/lib/common.Makefile |
|
4 |
|
|
5 |
|
|
6 |
##### Configuration |
|
7 |
|
|
8 |
# Command line |
|
9 |
continue ?= |
|
10 |
debug ?= |
|
11 |
full_import ?= |
|
12 |
import_source ?= 1 |
|
13 |
is_view ?= |
|
14 |
log ?= $(if $(test),,1) |
|
15 |
noclobber ?= |
|
16 |
profile ?= |
|
17 |
quiet ?= |
|
18 |
reverify ?= 1 |
|
19 |
schema_only ?= |
|
20 |
use_staged ?= $(by_col) |
|
21 |
|
|
22 |
# Makefile |
|
23 |
exts ?= csv tsv tab txt dat dmp |
|
24 |
test_n ?= 2 |
|
25 |
|
|
26 |
##### Vars/functions |
|
27 |
|
|
28 |
# Paths |
|
29 |
datasrc := $(patsubst .%,%,$(notdir $(realpath .))) |
|
30 |
bin := $(root)/bin |
|
31 |
mappings := $(root)/mappings |
|
32 |
|
|
33 |
# Make |
|
34 |
SHELL := /bin/bash |
|
35 |
selfMake = $(MAKE) --makefile=../input.Makefile |
|
36 |
subMake = $(MAKE) "$(@:$(root)/%=%)" --directory=$(root) |
|
37 |
+_ = $(+:_%=) |
|
38 |
# used to filter the output of embedded $(shell make ...) invocations |
|
39 |
filter_make := grep -vF -e lib -e ".Makefile'." |
|
40 |
addBeforeExt = $(basename $(2))$(1)$(suffix $(2)) |
|
41 |
|
|
42 |
# Terminal |
|
43 |
termCols := $(shell tput cols) |
|
44 |
|
|
45 |
# Commands |
|
46 |
MKDIR = mkdir -p |
|
47 |
mkdir = $(MKDIR) $(@D) |
|
48 |
diff = diff --unified=2 |
|
49 |
diffIgnoreSpace = $(diff) --ignore-space-change |
|
50 |
diffVerbose = $(if $(verbose),diff --side-by-side --left-column\ |
|
51 |
--width=$(termCols),$(diff)) |
|
52 |
|
|
53 |
# BIEN commands |
|
54 |
sortFilenames = $(shell $(bin)/sort_filenames $(1)) |
|
55 |
selfMap = $(bin)/cols 0 0 |
|
56 |
psqlAsBien := $(bin)/psql_verbose_vegbien |
|
57 |
psqlNoSearchPath := env no_search_path=1 $(psqlAsBien) |
|
58 |
# Usage: ($(inDatasrc); cat $(file))|$(psqlCmd) |
|
59 |
inDatasrc := echo 'SET search_path TO "$(datasrc)";' |
|
60 |
|
|
61 |
# SVN |
|
62 |
setSvnIgnore = svn propset svn:ignore $(2) $(1) |
|
63 |
define addDirWithIgnore |
|
64 |
$(addDir) |
|
65 |
$(setSvnIgnore) |
|
66 |
endef |
|
67 |
|
|
68 |
##### Environment |
|
69 |
|
|
70 |
export PATH := $(bin):$(PATH) |
|
71 |
|
|
72 |
##### General targets |
|
73 |
|
|
74 |
all: _always maps ; |
|
75 |
|
|
76 |
clean: _always |
|
77 |
$(RM) $(all) |
|
78 |
|
|
79 |
remake: _always clean |
|
80 |
+$(selfMake) |
|
81 |
# re-run make so that cache of existing files is reset |
|
82 |
|
|
83 |
# Only remake if doesn't exist. This prevents unintentional remaking when the |
|
84 |
# make script is newly checked out from svn (which sets the mod time to now) but |
|
85 |
# the output is synced externally. |
|
86 |
# Can't remove prereq to do this, because it determines when the rule applies. |
|
87 |
make_script = $(if $(wildcard $@),,"time" ./$< >$@) |
|
88 |
|
|
89 |
%/: %/map.csv _always ; |
|
90 |
|
|
91 |
%/: % _always ; |
|
92 |
|
|
93 |
%: %.make |
|
94 |
$(make_script) |
|
95 |
.PRECIOUS: % # save partial outputs of aborted src make scripts |
|
96 |
|
|
97 |
##### Tables discovery |
|
98 |
|
|
99 |
sortFile := import_order.txt |
|
100 |
noImportFile := _no_import |
|
101 |
|
|
102 |
dontImport = $(wildcard $(noImportFile))$(wildcard $(1)/$(noImportFile))$(if\ |
|
103 |
$(import_source),,$(filter Source,$(1))) |
|
104 |
|
|
105 |
ifeq ($(sort_file_updated),)# keep $(sortFile) up-to-date |
|
106 |
$(shell sort_file_updated=1 $(selfMake) $(sortFile)|$(filter_make) >&2) |
|
107 |
export sort_file_updated=1 |
|
108 |
endif |
|
109 |
|
|
110 |
sort_file_tables := $(if $(wildcard $(sortFile)),$(shell cat $(sortFile))) |
|
111 |
# $(shell) replaces "\n" with " " |
|
112 |
tables := $(sort_file_tables) |
|
113 |
allSubdirs := $(call wildcard/,*/) |
|
114 |
allTables := $(call sortFilenames,$(filter-out _% verify logs,$(allSubdirs:%/=%))) |
|
115 |
joinedTables := $(filter-out $(tables),$(allTables)) |
|
116 |
allTables := $(strip $(joinedTables) $(tables))# move joined tables to beginning |
|
117 |
has_visible_files = $(call wildcard/,$(1)/*) |
|
118 |
allTables := $(foreach table,$(allTables),$(if\ |
|
119 |
$(call has_visible_files,$(table)),$(table))) |
|
120 |
ifeq ($(tables),)# none specified in sort file |
|
121 |
tables := $(allTables) |
|
122 |
endif |
|
123 |
importTables := $(foreach table,$(tables),$(if\ |
|
124 |
$(call dontImport,$(table)),,$(table))) |
|
125 |
|
|
126 |
list_tables: _always # use `make -s` to avoid echoing commands |
|
127 |
@for table in $(tables); do echo "$$table"; done |
|
128 |
|
|
129 |
$(sortFile): _always |
|
130 |
$(if $(filter-out $(sort_file_tables),$(tables)),$(selfMake) -s list_tables >$@) |
|
131 |
# add any missing tables to $(sortFile) |
|
132 |
|
|
133 |
##### SVN |
|
134 |
|
|
135 |
svnFilesGlob:= */{,{,.}{data,map,VegBIEN}.csv{,.*},*header.*,*.sql,test.xml.ref} |
|
136 |
svnFilesGlob := {map.csv,*{grants,validations,~}*.sql,{,*/}{*.{log,make},*terms.csv},$(svnFilesGlob)} |
|
137 |
_svnFilesGlob := {_MySQL/{,*.make},{,*/}{$(noImportFile),*{run,Makefile},*.{md5,url},*README.TXT}} |
|
138 |
svnFiles = $(filter-out _% logs/% %.data.sql,$(call wildcard/,$(svnFilesGlob)))\ |
|
139 |
$(call wildcard/,$(_svnFilesGlob)) |
|
140 |
|
|
141 |
add: _always $(if $(call dontImport,.),,add!) ; |
|
142 |
|
|
143 |
# To update all inputs with these settings: make inputs/add |
|
144 |
add!: _always Source/add $(allTables:%=%/add) |
|
145 |
$(call setSvnIgnore,.,'*') |
|
146 |
$(call addDirWithIgnore,logs,$$'*.gz\n*.log.sql\n*.trace') |
|
147 |
$(call addDirWithIgnore,verify,$$'*.csv\n*.log\n*.out\n*.tsv\n*.txt\n*.xls\n*.xlsx') |
|
148 |
$(call addFile,import_order.txt) |
|
149 |
$(if $(wildcard _MySQL/),$(call addDirWithIgnore,_MySQL,'*')) |
|
150 |
$(if $(wildcard _src/),$(call addDirWithIgnore,_src,'*')) |
|
151 |
$(if $(wildcard _archive/),$(call addDirWithIgnore,_archive,'*')) |
|
152 |
# invoke externally to clear $$(wildcard) cache before expanding $$(svnFiles) |
|
153 |
$(selfMake) add_files |
|
154 |
|
|
155 |
add_files: _always |
|
156 |
$(call add*,$(svnFiles)) |
|
157 |
|
|
158 |
# Adds a new table subdir |
|
159 |
%/add: _always |
|
160 |
$(call addDirWithIgnore,$*,'*') |
|
161 |
$(call addDirWithIgnore,$*/logs,$$'*.gz\n*.log.sql\n*.trace') |
|
162 |
|
|
163 |
##### Existing maps discovery |
|
164 |
|
|
165 |
anyMap := %/map.csv %/VegBIEN.csv %/unmapped_terms.csv %/new_terms.csv |
|
166 |
|
|
167 |
exts := $(call ci,$(exts)) |
|
168 |
extsFilter := $(addprefix %.,$(exts)) |
|
169 |
dataOnly = $(filter $(extsFilter),$(1)) |
|
170 |
|
|
171 |
anyTest = $*/test.% |
|
172 |
srcsOnly = $(filter-out $(anyMap) $(anyTest) %/logs,$(call dataOnly,$(1))) |
|
173 |
|
|
174 |
srcDict := map.csv |
|
175 |
|
|
176 |
vocab := $(mappings)/VegCore.vocab.csv |
|
177 |
thesaurus := $(mappings)/VegCore.thesaurus.csv |
|
178 |
coreMap := $(mappings)/VegCore-VegBIEN.csv |
|
179 |
|
|
180 |
viaMaps := $(tables:%=%/map.csv) |
|
181 |
|
|
182 |
autogenMaps := $(subst map.,VegBIEN.,$(viaMaps)) |
|
183 |
directMaps := $(autogenMaps) $(filter-out $(autogenMaps),\ |
|
184 |
$(wildcard */VegBIEN.csv)) |
|
185 |
|
|
186 |
##### Sources |
|
187 |
|
|
188 |
srcs = $(call sortFilenames,$(call srcsOnly,$(wildcard $*/*))) |
|
189 |
nonHeaderSrcs = $(filter-out %/header.csv %/header.txt,$(srcs)) |
|
190 |
isRef = $(if $(nonHeaderSrcs),,1) |
|
191 |
# empty subdir, so references an already-installed staging table |
|
192 |
catSrcs = $(bin)/cat_csv $(nonHeaderSrcs) |
|
193 |
withCatSrcs = $(catSrcs:$(bin)/%=$(bin)/with_%) -- |
|
194 |
|
|
195 |
%/list_srcs: _always # use `make -s` to avoid echoing commands |
|
196 |
echo $(nonHeaderSrcs) |
|
197 |
|
|
198 |
# Run with `make -s` to avoid echoing make commands |
|
199 |
cat: $(importTables:%=%/cat) _always ; |
|
200 |
|
|
201 |
%/cat: _always |
|
202 |
$(catSrcs) |
|
203 |
|
|
204 |
##### Input data retrieval |
|
205 |
|
|
206 |
# Must come before `%.sql: _MySQL/%.sql` to override it |
|
207 |
%.sql: %.sql.make |
|
208 |
$(make_script) |
|
209 |
|
|
210 |
# The export must be created with: |
|
211 |
# `--compatible=postgresql --add-locks=false --set-charset --no-create-info` |
|
212 |
# Must come before `%.sql: _MySQL/%.sql` to override it |
|
213 |
%.data.sql: _MySQL/%.data.sql |
|
214 |
$(if $(wildcard $@),,$(bin)/my2pg.data <$< >$@) |
|
215 |
|
|
216 |
# The export must be created with: |
|
217 |
# `--compatible=postgresql --add-locks=false --set-charset` |
|
218 |
# Add `--no-data` to create a schema-only export. |
|
219 |
%.sql: _MySQL/%.sql |
|
220 |
$(if $(wildcard $@),,$(bin)/my2pg <$< >$@) |
|
221 |
|
|
222 |
##### Staging tables installation |
|
223 |
|
|
224 |
srcTable := %.src |
|
225 |
|
|
226 |
dbExportsWildcard = $(sort $(patsubst _MySQL/%.make,%,$(wildcard\ |
|
227 |
$(1) _MySQL/$(1).make))) |
|
228 |
|
|
229 |
dbExports := $(call dbExportsWildcard,*schema*.sql)# schemas first |
|
230 |
ifeq ($(schema_only),) # add rest of .sql files |
|
231 |
dbExports += $(filter-out $(dbExports),$(call dbExportsWildcard,*.sql)) |
|
232 |
endif |
|
233 |
dbExports := $(filter-out grants.sql,$(dbExports)) |
|
234 |
dbExports := $(strip $(dbExports))# += adds extra whitespace |
|
235 |
allInstalls := $(if $(dbExports),sql) $(allTables) |
|
236 |
|
|
237 |
datasrc_schema_exists = $(shell schema=$(datasrc)\ |
|
238 |
$(root)/lib/runscripts/local.run pg_schema_exists && echo t) |
|
239 |
|
|
240 |
install: _always |
|
241 |
$(if $(wildcard ./run),./run install,$(if $(wildcard table.run),$(if\ |
|
242 |
$(datasrc_schema_exists),,+$(selfMake) install_oldstyle))) |
|
243 |
+$(selfMake) validate/install |
|
244 |
# table.run: only run this for datasource dirs |
|
245 |
|
|
246 |
install_oldstyle: _always schema $(allInstalls:%=%/install) ; |
|
247 |
|
|
248 |
uninstall: _always confirm_rm_schema rm_schema ; |
|
249 |
# rm_schema will also drop all staging tables |
|
250 |
|
|
251 |
reinstall: _always uninstall install ; |
|
252 |
|
|
253 |
confirm_rm_schema: _always |
|
254 |
$(if $(filter TNRS,$(datasrc)),$(call confirm,WARNING: This will delete the\ |
|
255 |
TNRS cache!,To save it: make backups/TNRS.backup-remake)) |
|
256 |
|
|
257 |
schema: _always |
|
258 |
-echo 'CREATE SCHEMA "$(datasrc)";'|$(psqlNoSearchPath) |
|
259 |
# ignore errors if schema exists |
|
260 |
|
|
261 |
rm_schema: _always |
|
262 |
echo 'DROP SCHEMA IF EXISTS "$(datasrc)" CASCADE;'|$(psqlNoSearchPath) |
|
263 |
|
|
264 |
installLog := logs/install.log.sql |
|
265 |
|
|
266 |
log_dir = $(1)logs |
|
267 |
has_log_dir = $(call and,$(wildcard $(log_dir)),$(call not,$(noclobber))) |
|
268 |
logInstall = $(if $(has_log_dir),$(if $(quiet),$(2)$(1)$(installLog)\ |
|
269 |
2>&1,2>&1|tee $(3) $(1)$(installLog))) |
|
270 |
logInstallRoot = $(call logInstall,,>) |
|
271 |
logInstall* = $(call logInstall,$*/,>) |
|
272 |
logInstall*Add = $(call logInstall,$*/,>>,-a)# append to log |
|
273 |
|
|
274 |
# Must come before %/install to override it |
|
275 |
sql/install: $(dbExports) |
|
276 |
set -o pipefail; ($(inDatasrc); $(if $(wildcard schema.sql),cat schema.sql;)\ |
|
277 |
cat $(filter-out schema.sql grants.sql,$+)|pg_dump_limit $(if $(wildcard\ |
|
278 |
grants.sql),; cat grants.sql))|"time" env no_search_path=1 \ |
|
279 |
$(bin)/psql_$(if $(debug),verbose,script)_vegbien --set=schema='"$(datasrc)"' \ |
|
280 |
$(logInstallRoot) |
|
281 |
|
|
282 |
# $debug option runs the *.sql import verbosely, to display which statements are |
|
283 |
# being run. this should only be used for SQL files that use COPY FROM to import |
|
284 |
# data, to avoid echoing pages of insert statements. |
|
285 |
cleanup = set -o pipefail; \ |
|
286 |
$(if $(wildcard $*/cleanup.sql),($(inDatasrc); cat $*/cleanup.sql)\ |
|
287 |
|"time" $(psqlNoSearchPath) --echo-all --set=table='"$*"' $(logInstall*Add),\ |
|
288 |
(export schema=$(datasrc) table=$*; . $(bin)/vegbien_dest; unset schemas; \ |
|
289 |
$(bin)/csv2db) $(logInstall*Add)) |
|
290 |
|
|
291 |
%/header.csv: |
|
292 |
set -o pipefail; \ |
|
293 |
echo 'COPY (SELECT * FROM "$(datasrc)"."$*" LIMIT 0) TO STDOUT CSV HEADER;'|\ |
|
294 |
env no_search_path=1 $(bin)/psql_script_vegbien >$*/header.csv |
|
295 |
|
|
296 |
exportHeader = $(selfMake) "$*/header.csv" |
|
297 |
|
|
298 |
# Don't try to edit a view. Must come before %/install to override it. |
|
299 |
%_view/install: _always ; |
|
300 |
|
|
301 |
%.sql/run: _always |
|
302 |
$(if $(wildcard $(@D)),($(inDatasrc); cat $(@D))|(cd '$(*D)';\ |
|
303 |
"time" env no_search_path=1 ../$(bin)/psql_verbose_vegbien \ |
|
304 |
--set=table='"$(*D)"' --set=table_str=\''"$(*D)"'\')) |
|
305 |
|
|
306 |
%/postprocess.sql: $(thesaurus) _always |
|
307 |
$(if $(wildcard $*/run),$(bin)/in_place $@ env text=1 $(bin)/repl $<) |
|
308 |
|
|
309 |
%/postprocess: _always |
|
310 |
$(if\ |
|
311 |
$(wildcard $*/run),$*/run postprocess,$(selfMake) "$*/postprocess.sql/run") |
|
312 |
|
|
313 |
%/map_table: _always |
|
314 |
$(if $(wildcard $*/run),$*/run map_table) |
|
315 |
|
|
316 |
# For staging tables which are derived by joining together other staging tables. |
|
317 |
%/install: %/create.sql _always |
|
318 |
set -o pipefail; \ |
|
319 |
($(inDatasrc); echo 'CREATE TABLE "$*" AS'; cat $<; echo ';')|"time" \ |
|
320 |
$(psqlNoSearchPath) --echo-all --set=schema='"$(datasrc)"' --set=table='"$*"' \ |
|
321 |
$(logInstall*) |
|
322 |
$(exportHeader) |
|
323 |
$(selfMake) "$*/postprocess" |
|
324 |
$(cleanup) |
|
325 |
|
|
326 |
%/install: _always |
|
327 |
$(import_install_) |
|
328 |
$(exportHeader) |
|
329 |
$(selfMake) "$*/postprocess" |
|
330 |
$(cleanup) |
|
331 |
define import_install_ |
|
332 |
set -o pipefail; (. $(bin)/vegbien_dest; unset schemas; "time" $(nice)\ |
|
333 |
env schema=$(datasrc) table=$* $(bin)/csv2db $(catSrcs) $(logInstall*)) |
|
334 |
$(if $(filter $(srcTable),$*),($(inDatasrc);\ |
|
335 |
echo 'ALTER TABLE "$(datasrc)"."$*" RENAME row_num TO "$*.row_num";')|"time"\ |
|
336 |
$(psqlNoSearchPath) --echo-all --set=table='"$*"' $(logInstall*Add)) |
|
337 |
endef |
|
338 |
# table-scope src table's row_num col to allow joining it with other tables |
|
339 |
|
|
340 |
%/uninstall: _always |
|
341 |
echo 'DROP $(if\ |
|
342 |
$(is_view),VIEW,TABLE) IF EXISTS "$(datasrc)"."$*" CASCADE;'|$(psqlNoSearchPath) |
|
343 |
|
|
344 |
%/reinstall: _always %/uninstall %/install ; |
|
345 |
|
|
346 |
postprocess: _always $(allTables:%=%/postprocess) ; |
|
347 |
|
|
348 |
cleanup: _always $(tables:%=%/cleanup) ; |
|
349 |
|
|
350 |
# WARNING: This removes any index comments, due to a PostgreSQL bug. |
|
351 |
# This occurs because ALTER TABLE recreates the index but not its comment. |
|
352 |
%/cleanup: _always |
|
353 |
$(cleanup) |
|
354 |
|
|
355 |
##### Maps building |
|
356 |
|
|
357 |
# Maps to (try to) build are added to this |
|
358 |
maps := |
|
359 |
|
|
360 |
srcRoot = $(mappings)/root.sh |
|
361 |
mkSrcMap = (. $(srcRoot); env datasrc=$(datasrc) $(bin)/src_map <$*/header.csv\ |
|
362 |
>$@) |
|
363 |
|
|
364 |
translate = $(if $(wildcard $(1)),$(bin)/in_place $< $(bin)/translate_ci 1 $(1)) |
|
365 |
|
|
366 |
$(srcDict):# empty target in case it doesn't exist |
|
367 |
|
|
368 |
# Via maps cleanup |
|
369 |
ifneq ($(filter %/.map.csv.last_cleanup,$(MAKECMDGOALS)),) |
|
370 |
%/.map.csv.last_cleanup: %/map.csv $(vocab) $(thesaurus) $(coreMap) $(srcDict) |
|
371 |
$(call translate,$(srcDict)) |
|
372 |
$(bin)/in_place $< $(bin)/canon 1 $(vocab) |
|
373 |
$(call translate,$(thesaurus)) |
|
374 |
$(bin)/in_place $< $(bin)/fix_line_endings |
|
375 |
touch $@ |
|
376 |
+$(selfMake) $(<:%/map.csv=%/unmapped_terms.csv) |
|
377 |
+$(selfMake) $(<:%/map.csv=%/new_terms.csv) |
|
378 |
.PRECIOUS: %/.map.csv.last_cleanup |
|
379 |
else |
|
380 |
%/map.csv: _always |
|
381 |
$(if $(wildcard $@),,$(mk_map_csv)) |
|
382 |
+$(selfMake) $(@:%/map.csv=%/.map.csv.last_cleanup) |
|
383 |
define mk_map_csv |
|
384 |
+$(selfMake) "$*/header.csv" |
|
385 |
$(mkSrcMap) |
|
386 |
endef |
|
387 |
endif |
|
388 |
|
|
389 |
%/VegBIEN.csv: %/map.csv $(coreMap) |
|
390 |
$(if $(wildcard $*/run),-ln -s "../$(coreMap)" "$@"\ |
|
391 |
,<$< $(bin)/cat_cols 1 2|$(bin)/join $(coreMap)|$(bin)/sort_map >$@) |
|
392 |
# ignore errors if symlink exists |
|
393 |
maps += $(autogenMaps) |
|
394 |
|
|
395 |
maps: $(maps) _always ; |
|
396 |
|
|
397 |
all += $(maps) |
|
398 |
|
|
399 |
##### Maps validation |
|
400 |
|
|
401 |
# `tail -n +2`: Remove header before running filter_out_ci because filter_out_ci |
|
402 |
# only removes the header if it matches the vocabulary's header. |
|
403 |
|
|
404 |
%/unmapped_terms.csv: %/map.csv $(coreMap) |
|
405 |
tail -n +2 $<|$(bin)/cols 1|$(bin)/filter_out_ci 0 $(coreMap) >$@ |
|
406 |
$(bin)/autoremove $@ |
|
407 |
|
|
408 |
%/new_terms.csv: %/map.csv $(vocab) $(thesaurus) %/unmapped_terms.csv |
|
409 |
$(newTerms) |
|
410 |
$(bin)/autoremove $@ |
|
411 |
newTerms = tail -n +2 $<|$(bin)/filter_out_ci 0 $(vocab)|$(bin)/filter_out_ci 0\ |
|
412 |
$(thesaurus) $(if $(wildcard $(word 4,$+)),|$(bin)/filter_out_ci 0 $(word 4,$+))\ |
|
413 |
|grep -vE -e '^"?:' -e 'UNUSED' >$@; exit 0 # grep exits nonzero if no match |
|
414 |
|
|
415 |
termsSubdirs := $(tables) |
|
416 |
|
|
417 |
include $(root)/lib/mappings.Makefile |
|
418 |
|
|
419 |
##### External dependencies |
|
420 |
|
|
421 |
$(root)/%: _always |
|
422 |
+$(subMake) |
|
423 |
.PRECIOUS: $(root)/% # let ext. dir's Makefile decide whether to delete on error |
|
424 |
|
|
425 |
##### Mapping |
|
426 |
|
|
427 |
+maps = $(filter %/map.csv %/VegBIEN.csv $(mappings)/%,$(+_)) |
|
428 |
map2db = env in_database=vegbien in_schema=$(datasrc) in_table=$*\ |
|
429 |
out_database=vegbien source=$(datasrc).new $(root)/map $(+maps) |
|
430 |
|
|
431 |
##### Import to VegBIEN |
|
432 |
|
|
433 |
import_temp: $(importTables:%=%/import) _always ; |
|
434 |
|
|
435 |
import: _always import_temp publish validate ; # removes temp suffix when done |
|
436 |
|
|
437 |
rm: _always |
|
438 |
echo "SELECT datasource_rm('$(datasrc)');"|"time" $(psqlAsBien) |
|
439 |
|
|
440 |
reimport: _always import ; # import replaces the previous import |
|
441 |
|
|
442 |
profileTest = $(if $(profile),$(if $(test),1)) |
|
443 |
profileOnly = -env profile_to=/dev/fd/3 $(map2db) 3>&1 1>&2|\ |
|
444 |
$(bin)/profile_stats /dev/fd/0 |
|
445 |
|
|
446 |
log_ = $*/logs/$(if $(n),n=$(n).,)$(version).log.sql |
|
447 |
|
|
448 |
# Displays the import log file path |
|
449 |
# Run with `make -s` to avoid echoing make commands |
|
450 |
%/log_file: _always |
|
451 |
echo $$(pwd)/$(log_) |
|
452 |
|
|
453 |
trace = $(log_:.log.sql=.trace) |
|
454 |
restart_row = $(shell set -x; grep -F Partition: $(log_)|tail -1|$(sed)\ |
|
455 |
's/^.* rows ([[:digit:]]+)-.*$$/\1/') |
|
456 |
import = $(if $(profileTest),$(profileOnly),(set -x; date; "time" env commit=1\ |
|
457 |
$(if $(profile),profile_to=$(trace)) $(if $(continue),start=$(restart_row))\ |
|
458 |
$(map2db))$(if $(log), >>$(log_) 2>&1)) |
|
459 |
|
|
460 |
import? = $(if $(call and,$(full_import),$(call dontImport,.)),,$(import)) |
|
461 |
|
|
462 |
%/import_temp: %/VegBIEN.csv _always |
|
463 |
$(if $(full_import),-)$(import?) |
|
464 |
# don't abort on import errors, which often relate to invalid input data |
|
465 |
# default: |
|
466 |
%/import_temp: _always ; |
|
467 |
|
|
468 |
%/import: %/import_temp _always ; |
|
469 |
|
|
470 |
#### Taxonomic scrubbing |
|
471 |
|
|
472 |
scrub: _always |
|
473 |
$(selfMake) $(root)/scrub & echo $$! |
|
474 |
# using & (background process) also ignores TNRS errors, so that TNRS bugs do |
|
475 |
# not prevent the remaining tables from being imported even if TNRS can't be run |
|
476 |
|
|
477 |
# Removes TNRS taxondeterminations |
|
478 |
unscrub: _always |
|
479 |
echo "SELECT delete_scrubbed_taxondeterminations('$(datasrc)');"|\ |
|
480 |
"time" env no_query_results=1 $(psqlAsBien) |
|
481 |
|
|
482 |
rescrub: _always unscrub scrub ; |
|
483 |
|
|
484 |
import_scrub: _always import scrub ; |
|
485 |
|
|
486 |
reimport_scrub: _always reimport scrub ; |
|
487 |
|
|
488 |
%/import_scrub: _always %/import scrub ; |
|
489 |
|
|
490 |
##### Log files from import |
|
491 |
|
|
492 |
logs := $(wildcard */logs/*.log.sql */logs/*.trace) |
|
493 |
|
|
494 |
rm_logs: _always |
|
495 |
$(RM) $(logs) |
|
496 |
|
|
497 |
##### Verification of import |
|
498 |
|
|
499 |
### new-style aggregating validations (also work w/ old-style import) |
|
500 |
|
|
501 |
# must be in input.Makefile instead of table.run because some datasources that |
|
502 |
# we validate still use old-style import |
|
503 |
|
|
504 |
# validations.sql must be in a subdir so it won't get run by sql/install |
|
505 |
validate/install: _always verify/validations.sql/run ; |
|
506 |
|
|
507 |
validate: _always |
|
508 |
echo "SELECT remake_diff_tables('$(datasrc)');"\ |
|
509 |
|$(psqlAsBien)$(if $(log), >>.$(log_) 2>&1) |
|
510 |
|
|
511 |
### old-style aggregating validations (not related to old-style import) |
|
512 |
|
|
513 |
verifyTables := $(patsubst verify/%.ref,%,$(wildcard verify/*.ref)) |
|
514 |
|
|
515 |
verify: $(verifyTables:%=%/verify) _always ; |
|
516 |
|
|
517 |
%/verify: verify/%.ref verify/%.out _always |
|
518 |
-$(diffVerbose) $(+_) |
|
519 |
# don't abort on verification errors, which are expected during development |
|
520 |
# default: |
|
521 |
%/verify: verify/%.out _always |
|
522 |
$(if $(shell test -e $< && echo t),cat $<) |
|
523 |
# don't run if verify/%.out's default do-nothing action was used |
|
524 |
# can't use $(wildcard) because it won't recheck file after verify/%.out is run |
|
525 |
|
|
526 |
psqlExport := "time" $(psqlNoSearchPath) --no-align --field-separator=$$'\t'\ |
|
527 |
--pset=footer=off --pset=null=NULL |
|
528 |
# Note that using $(inDatasrc) will not work with datasources whose tables are |
|
529 |
# the same name as VegBIEN tables (likely only VegBank), because the datasource |
|
530 |
# is first in the search_path. |
|
531 |
verify = $(if $(reverify),($(inDatasrc); cat $<)|$(psqlExport)\ |
|
532 |
--set=datasource="'$(datasrc)'" >$@) |
|
533 |
|
|
534 |
verify/%.out: verify/%.out.sql _always |
|
535 |
$(verify) |
|
536 |
.PRECIOUS: verify/%.out # save partial output in case of error to help debugging |
|
537 |
# default: |
|
538 |
verify/%.out: _always ; |
|
539 |
|
|
540 |
all += $(wildcard verify/*.out) |
|
541 |
|
|
542 |
%.ref: %.ref.sql |
|
543 |
($(inDatasrc); cat $<)|$(psqlExport) >$@ |
|
544 |
.PRECIOUS: %.ref # there must always be a .ref for the make rules to work |
|
545 |
|
|
546 |
##### Editing import |
|
547 |
|
|
548 |
rename/%: _always |
|
549 |
echo "UPDATE source SET shortname = '$*' WHERE shortname = '$(datasrc)';"\ |
|
550 |
|$(psqlAsBien) |
|
551 |
|
|
552 |
%/publish: _always # usage: make inputs/src/src.version/publish |
|
553 |
echo "SELECT datasource_publish('$*');"|$(psqlAsBien) |
|
554 |
|
|
555 |
publish: _always $(datasrc).new/publish ; # usage: make inputs/src/publish |
|
556 |
|
|
557 |
##### Testing |
|
558 |
|
|
559 |
testRefOutput = $(subst .by_col,,$(1)) |
|
560 |
testRef = $(testRefOutput).ref |
|
561 |
hasOwnRef = $(filter $@,$(call testRefOutput,$@)) |
|
562 |
# filter returns non-empty if they are equal |
|
563 |
|
|
564 |
# `rm $@`: Remove outputs of successful tests to reduce clutter |
|
565 |
# `$(foreach use_staged...)`: Run with use_staged=1 |
|
566 |
define runTest |
|
567 |
@echo "Testing $(abspath $@)..." |
|
568 |
>$@ env test=1 n=$(test_n) $(1) $(foreach use_staged,1,$(map2db)) |
|
569 |
$(if $(hasOwnRef),,-)@(set -x; $(diffIgnoreSpace) $(call testRef,$@) $@) 2>&1\ |
|
570 |
&& rm $@ || { e=$$?; $(if $(wildcard $(call testRef,$@)),,cat $@;)\ |
|
571 |
$(if $(hasOwnRef),\ |
|
572 |
{\ |
|
573 |
read -p $(emph)'Accept new test output? (y/n)'$(endEmph) REPLY;\ |
|
574 |
if test "$$REPLY" = y; then\ |
|
575 |
(set -x; $(MAKE) $@-ok --directory=$(realpath .) --makefile=../input.Makefile);\ |
|
576 |
exit 0;\ |
|
577 |
fi;\ |
|
578 |
};,\ |
|
579 |
echo $(emph)"Note: The preceding failed test is compared to another test's\ |
|
580 |
output"$(endEmph);\ |
|
581 |
echo $(emph)"When it fails, this always indicates a bug"$(endEmph);\ |
|
582 |
)\ |
|
583 |
exit $$e;} |
|
584 |
endef |
|
585 |
|
|
586 |
tests := |
|
587 |
|
|
588 |
%/test: %/test.xml $(if $(by_col),%/test.by_col.xml) _always ; |
|
589 |
|
|
590 |
# Requires staging tables. To create them, run `make inputs/<datasrc>/install`. |
|
591 |
# Non-flat-file inputs fall back to mimicking a successful test |
|
592 |
%/test.xml: %/VegBIEN.csv _always |
|
593 |
$(call runTest,by_col=) |
|
594 |
tests += %/test.xml |
|
595 |
|
|
596 |
%/test.by_col.xml: %/VegBIEN.csv _always |
|
597 |
$(call runTest,by_col=1) |
|
598 |
|
|
599 |
# Only run column-based tests if column-based mode enabled, because these tests |
|
600 |
# are much slower than the row-based tests for small numbers of rows |
|
601 |
ifneq ($(by_col),) |
|
602 |
tests += %/test.by_col.xml |
|
603 |
endif |
|
604 |
|
|
605 |
testOutputs := $(foreach test,$(tests),$(tables:%=$(test))) |
|
606 |
|
|
607 |
.PRECIOUS: $(testOutputs) # save outputs of failed tests so they can be accepted |
|
608 |
|
|
609 |
test: _always $(testOutputs) ; |
|
610 |
|
|
611 |
all += $(wildcard %/test*.xml) |
|
612 |
|
|
613 |
# Accepts a test output: make <test_output_path>-ok |
|
614 |
%-ok: _always |
|
615 |
mv $* $(call testRef,$*) |
|
616 |
|
|
617 |
accept-all: _always |
|
618 |
+yes|$(selfMake) test |
|
619 |
|
|
620 |
##### Documentation |
|
621 |
|
|
622 |
steps = $(selfMake) -s $*/import test=1 by_col=1 verbosity=2 n=100\ |
|
623 |
2>&1|$(bin)/debug2redmine >$@ |
|
624 |
|
|
625 |
%/logs/steps.by_col.log.sql: _always |
|
626 |
+$(steps) |
Also available in: Unified diff
fix: *Makefile: changed line endings to \n so that `patch` can work with pasted input. use `svn di --extensions --ignore-eol-style` to verify no diff.