Project

General

Profile

« Previous | Next » 

Revision 9564

added inputs/GBIF/_MySQL/MySQL.data.sql.run, with helper functions for resuming the import to MySQL from where it left off. this is very useful if the import is interrupted for any reason, because otherwise, the entire import would have to be run again from the start, taking 40-50 hours. import_resume_pos() uses new binsearch() to find where in the file the import left off, based on which pkeys have already been imported. (GBIF pkeys are unfortnately not in any order in the input file, nor are they in insertion order in the imported table, because MySQL instead clusters the table by the pkey. this necessitates a much more complex solution to resuming a partial import.)

View differences:

inputs/GBIF/_MySQL/MySQL.data.sql.run
1
#!/bin/bash -e
2
. "$(dirname "${BASH_SOURCE[0]}")"/../../../lib/runscripts/table_dir.run
3
. "$(dirname "${BASH_SOURCE[0]}")"/../../../lib/sh/binsearch.sh
4

  
5
if self_not_included; then
6

  
7

  
8
#### import resuming
9

  
10

  
11
### is_pkey_imported()
12

  
13
table=raw_occurrence_record
14

  
15
is_pkey_imported__int() # usage: pkey=# is_pkey_imported__int
16
{
17
	echo_func; kw_params pkey; : "${pkey?}"; mk_table_esc
18
	test "$pkey" || { log++ echo_run echo 0; return; }
19
	
20
	use_local_remote
21
	data_only=1 mysql_ANSI <<<"SELECT COUNT(*) FROM $table_esc WHERE id = $pkey"
22
}
23

  
24
func_override is_pkey_imported__int__no_cache
25
is_pkey_imported__int() # caches the last result for efficiency
26
{
27
	local cache_key="$(declare -p pkey) $*"; load_cache
28
	if ! cached; then save_cache "$(${FUNCNAME}__no_cache "$@")" || return; fi
29
	echo_cached_value
30
}
31

  
32
is_pkey_imported()
33
{ echo_func; local int; int="$(is_pkey_imported__int)"; int2bool "$int"; }
34

  
35

  
36
get_pkey_at_pos() # usage: i=# get_pkey_at_pos
37
{
38
	echo_func; log++; kw_params i; : "${i?}"; mk_table_esc
39
	piped_cmd echo_run tail -c +"$i" "$top_file"\
40
	|echo_run sed -n '/^INSERT INTO '"$table_esc"' VALUES \(([0-9]+),.*$/{
41
s//\1/p
42
q # stop after first match
43
}'
44
}
45

  
46
is_pkey_at_pos_imported()
47
{ echo_func; local pkey; pkey="$(get_pkey_at_pos)"; is_pkey_imported; }
48

  
49
import_resume_pos() # usage: [min=#] [max=#] import_resume_pos
50
{
51
	echo_func; kw_params min max; local min="${min-0}"
52
	if ! isset max; then local max; max="$(file_size "$top_file")"; fi
53
	binsearch is_pkey_at_pos_imported
54
}
55

  
56
fi
0 57

  

Also available in: Unified diff