Revision 9564
Added by Aaron Marcuse-Kubitza over 11 years ago
inputs/GBIF/_MySQL/MySQL.data.sql.run | ||
---|---|---|
1 |
#!/bin/bash -e |
|
2 |
. "$(dirname "${BASH_SOURCE[0]}")"/../../../lib/runscripts/table_dir.run |
|
3 |
. "$(dirname "${BASH_SOURCE[0]}")"/../../../lib/sh/binsearch.sh |
|
4 |
|
|
5 |
if self_not_included; then |
|
6 |
|
|
7 |
|
|
8 |
#### import resuming |
|
9 |
|
|
10 |
|
|
11 |
### is_pkey_imported() |
|
12 |
|
|
13 |
table=raw_occurrence_record |
|
14 |
|
|
15 |
is_pkey_imported__int() # usage: pkey=# is_pkey_imported__int |
|
16 |
{ |
|
17 |
echo_func; kw_params pkey; : "${pkey?}"; mk_table_esc |
|
18 |
test "$pkey" || { log++ echo_run echo 0; return; } |
|
19 |
|
|
20 |
use_local_remote |
|
21 |
data_only=1 mysql_ANSI <<<"SELECT COUNT(*) FROM $table_esc WHERE id = $pkey" |
|
22 |
} |
|
23 |
|
|
24 |
func_override is_pkey_imported__int__no_cache |
|
25 |
is_pkey_imported__int() # caches the last result for efficiency |
|
26 |
{ |
|
27 |
local cache_key="$(declare -p pkey) $*"; load_cache |
|
28 |
if ! cached; then save_cache "$(${FUNCNAME}__no_cache "$@")" || return; fi |
|
29 |
echo_cached_value |
|
30 |
} |
|
31 |
|
|
32 |
is_pkey_imported() |
|
33 |
{ echo_func; local int; int="$(is_pkey_imported__int)"; int2bool "$int"; } |
|
34 |
|
|
35 |
|
|
36 |
get_pkey_at_pos() # usage: i=# get_pkey_at_pos |
|
37 |
{ |
|
38 |
echo_func; log++; kw_params i; : "${i?}"; mk_table_esc |
|
39 |
piped_cmd echo_run tail -c +"$i" "$top_file"\ |
|
40 |
|echo_run sed -n '/^INSERT INTO '"$table_esc"' VALUES \(([0-9]+),.*$/{ |
|
41 |
s//\1/p |
|
42 |
q # stop after first match |
|
43 |
}' |
|
44 |
} |
|
45 |
|
|
46 |
is_pkey_at_pos_imported() |
|
47 |
{ echo_func; local pkey; pkey="$(get_pkey_at_pos)"; is_pkey_imported; } |
|
48 |
|
|
49 |
import_resume_pos() # usage: [min=#] [max=#] import_resume_pos |
|
50 |
{ |
|
51 |
echo_func; kw_params min max; local min="${min-0}" |
|
52 |
if ! isset max; then local max; max="$(file_size "$top_file")"; fi |
|
53 |
binsearch is_pkey_at_pos_imported |
|
54 |
} |
|
55 |
|
|
56 |
fi |
|
0 | 57 |
Also available in: Unified diff
added inputs/GBIF/_MySQL/MySQL.data.sql.run, with helper functions for resuming the import to MySQL from where it left off. this is very useful if the import is interrupted for any reason, because otherwise, the entire import would have to be run again from the start, taking 40-50 hours. import_resume_pos() uses new binsearch() to find where in the file the import left off, based on which pkeys have already been imported. (GBIF pkeys are unfortnately not in any order in the input file, nor are they in insertion order in the imported table, because MySQL instead clusters the table by the pkey. this necessitates a much more complex solution to resuming a partial import.)