1
|
<?php
|
2
|
#############################################################################
|
3
|
# Driver file for taiper2flatClient, a simple script to
|
4
|
# consume taiper web services and save the results in a delimited file,
|
5
|
# one record per line. The current delimiter is backtick '`' but
|
6
|
# that can be changed by changing the $seperator variable located
|
7
|
# in configurableParams.php.
|
8
|
#
|
9
|
# Command line useage:
|
10
|
# php taiper2flat.php
|
11
|
#
|
12
|
# dependencies:
|
13
|
# php 5+
|
14
|
# pear
|
15
|
# HTTP_Request
|
16
|
# I had to install pear and HTTP_Request before it would work (on Ubuntu):
|
17
|
# sudo apt-get install php-http-request
|
18
|
# This should install all dependencies, but just in case, the full
|
19
|
# dependency list is:
|
20
|
# php-net-socket
|
21
|
# php-net-url
|
22
|
# php-pear
|
23
|
# php-http-request
|
24
|
#############################################################################
|
25
|
|
26
|
require_once('HTTP/Request.php'); // pear package
|
27
|
require_once('tapirRequestTemplate.php');
|
28
|
require_once('getAllConcepts.php');
|
29
|
|
30
|
global $url;
|
31
|
global $seperator;
|
32
|
|
33
|
$flat_filename = "specieslink.txt";
|
34
|
$record_num_filename = "recordnum.dat";
|
35
|
$error_log_filename = "error.log";
|
36
|
|
37
|
$supportedConcepts = getAllConcepts();
|
38
|
if(!is_array($supportedConcepts)){
|
39
|
$error_log = fopen($error_log_filename,"a");
|
40
|
fwrite($error_log, "Unable to retrieve supported concepts from service, failing.\n");
|
41
|
fclose($error_log);
|
42
|
exit;
|
43
|
}
|
44
|
|
45
|
$successive_failures = 0;
|
46
|
|
47
|
$start_record = 0; // Record to begin first request (ordinal position, not ID)
|
48
|
if(file_exists($record_num_filename)) {
|
49
|
$start_record = file_get_contents($record_num_filename);
|
50
|
}
|
51
|
|
52
|
$request_limit = 1000; // Number of records per request; cannot exceed 1000
|
53
|
|
54
|
//According to TAPIR docs, the estimate from the service
|
55
|
//is an upper bound on the number of records. Start out high.
|
56
|
$estimated_max_from_service = 4000000;
|
57
|
|
58
|
if(!file_exists($flat_filename)) {
|
59
|
$flat_file = fopen($flat_filename,"a");
|
60
|
$file_header = '';
|
61
|
foreach(array_keys($supportedConcepts) as $key){
|
62
|
$file_header .= $key . $seperator;
|
63
|
}
|
64
|
$file_header = substr_replace($file_header ,"",-1);
|
65
|
$file_header .= "\n";
|
66
|
fwrite($flat_file,$file_header);
|
67
|
fclose($flat_file);
|
68
|
}
|
69
|
|
70
|
$finished = false;
|
71
|
$itrNum = 0;
|
72
|
|
73
|
$http_request = new HTTP_Request();
|
74
|
|
75
|
while(!$finished && $start_record < $estimated_max_from_service) {
|
76
|
$itrNum++;
|
77
|
|
78
|
$body = buildRequest($start_record,$request_limit,$supportedConcepts);
|
79
|
|
80
|
$http_request->setMethod( 'POST' );
|
81
|
$http_request->addHeader('Content-Type', 'text/xml');
|
82
|
$http_request->addRawPostData( $body );
|
83
|
$http_request->setURL( $url );
|
84
|
$http_request->_timeout = 300;
|
85
|
$http_request->_readTimeout = 300;
|
86
|
|
87
|
// This can be used to see the entire request
|
88
|
#$raw_request = $http_request->_buildRequest();
|
89
|
#echo "\n\n" . $raw_request;
|
90
|
|
91
|
$http_request->sendRequest();
|
92
|
|
93
|
$response = $http_request->getResponseBody();
|
94
|
$code = $http_request->getResponseCode();
|
95
|
|
96
|
if ( $code != 200 ) // 200 = OK
|
97
|
{
|
98
|
$label = 'Unknown Error';
|
99
|
|
100
|
switch ( $code )
|
101
|
{
|
102
|
case 201: $label = 'Created'; break;
|
103
|
case 202: $label = 'Accepted'; break;
|
104
|
case 203: $label = 'Non-Authoritative Information'; break;
|
105
|
case 204: $label = 'No Content'; break;
|
106
|
case 205: $label = 'Reset Content'; break;
|
107
|
case 206: $label = 'Partial Content'; break;
|
108
|
case 300: $label = 'Multiple Choices'; break;
|
109
|
case 301: $label = 'Moved Permanently'; break;
|
110
|
case 302: $label = 'Found'; break;
|
111
|
case 303: $label = 'See Other'; break;
|
112
|
case 304: $label = 'Not Modified'; break;
|
113
|
case 305: $label = 'Use Proxy'; break;
|
114
|
case 307: $label = 'Temporary Redirect'; break;
|
115
|
case 400: $label = 'Bad Request'; break;
|
116
|
case 401: $label = 'Unauthorized'; break;
|
117
|
case 402: $label = 'Payment Required'; break;
|
118
|
case 403: $label = 'Forbidden'; break;
|
119
|
case 404: $label = 'Not Found'; break;
|
120
|
case 405: $label = 'Method Not Allowed'; break;
|
121
|
case 406: $label = 'Not Acceptable'; break;
|
122
|
case 407: $label = 'Proxy Authentication Required'; break;
|
123
|
case 408: $label = 'Request Timeout'; break;
|
124
|
case 409: $label = 'Conflict'; break;
|
125
|
case 410: $label = 'Gone'; break;
|
126
|
case 411: $label = 'Length Required'; break;
|
127
|
case 412: $label = 'Precondition Failed'; break;
|
128
|
case 413: $label = 'Request Entity Too Large'; break;
|
129
|
case 414: $label = 'Request-URI Too Long'; break;
|
130
|
case 415: $label = 'Unsupported Media Type'; break;
|
131
|
case 416: $label = 'Requested Range Not Satisfiable'; break;
|
132
|
case 417: $label = 'Expectation Failed'; break;
|
133
|
case 500: $label = 'Internal Server Error'; break;
|
134
|
case 501: $label = 'Not Implemented'; break;
|
135
|
case 502: $label = 'Bad Gateway'; break;
|
136
|
case 503: $label = 'Service Unavailable'; break;
|
137
|
case 504: $label = 'Gateway Timeout'; break;
|
138
|
case 505: $label = 'HTTP Version Not Supported'; break;
|
139
|
}
|
140
|
|
141
|
$error_log = fopen($error_log_filename,"a");
|
142
|
fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n");
|
143
|
|
144
|
$successive_failures += 1;
|
145
|
if($successive_failures >= 3) {
|
146
|
fwrite($error_log, "3 successive failures, quitting\n");
|
147
|
fclose($error_log);
|
148
|
exit;
|
149
|
}
|
150
|
|
151
|
fclose($error_log);
|
152
|
|
153
|
//Should repeat the same request
|
154
|
continue;
|
155
|
}
|
156
|
|
157
|
#Weird encoding bug - need to remove the ^F and ^L characters so
|
158
|
#that the xml parser won't choke.
|
159
|
$xmlDoc = new DOMDocument();
|
160
|
$response = preg_replace("//","6",$response);
|
161
|
$response = preg_replace("//","12",$response);
|
162
|
$xmlDoc->loadXML($response);
|
163
|
|
164
|
$errors = $xmlDoc->getElementsByTagName("error");
|
165
|
if($errors->length > 0) {
|
166
|
$error_log = fopen($error_log_filename,"a");
|
167
|
foreach($errors as $error) {
|
168
|
fwrite($error_log, $error->nodeValue . "\n");
|
169
|
}
|
170
|
|
171
|
$successive_failures += 1;
|
172
|
if($successive_failures >= 3) {
|
173
|
fwrite($error_log, "3 successive failures, quitting\n");
|
174
|
fclose($error_log);
|
175
|
exit;
|
176
|
}
|
177
|
|
178
|
fclose($error_log);
|
179
|
|
180
|
//Should repeat the same request
|
181
|
continue;
|
182
|
}
|
183
|
|
184
|
$summary = $xmlDoc->getElementsByTagName("summary");
|
185
|
if($summary->length == 0) {
|
186
|
$error_log = fopen($error_log_filename,"a");
|
187
|
fwrite($error_log, "No summary node, assuming there's a missed error.\n");
|
188
|
|
189
|
$successive_failures += 1;
|
190
|
if($successive_failures >= 3) {
|
191
|
fwrite($error_log, "3 successive failures, quitting\n");
|
192
|
fclose($error_log);
|
193
|
exit;
|
194
|
}
|
195
|
|
196
|
fclose($error_log);
|
197
|
continue;
|
198
|
} else {
|
199
|
$start_record_temp = $summary->item(0)->getAttribute("next");
|
200
|
$estimated_max_from_service = $summary->item(0)->getAttribute("totalMatched");
|
201
|
}
|
202
|
|
203
|
$records = $xmlDoc->getElementsByTagName("record");
|
204
|
$recordsStr = '';
|
205
|
foreach($records as $record) {
|
206
|
foreach(array_keys($supportedConcepts) as $key){
|
207
|
$element = $record->getElementsByTagName($key);
|
208
|
if($element->length > 0) {
|
209
|
$val = $element->item(0)->nodeValue;
|
210
|
$val = preg_replace("/$seperator/"," ",$val);
|
211
|
$val = preg_replace("/,/"," ",$val);
|
212
|
$val = preg_replace("/\"/"," ",$val);
|
213
|
$recordsStr .= $val;
|
214
|
}
|
215
|
$recordsStr .= $seperator;
|
216
|
}
|
217
|
$recordsStr = substr_replace($recordsStr ,"",-1);
|
218
|
$recordsStr .= "\n";
|
219
|
}
|
220
|
|
221
|
$flat_file = fopen($flat_filename,"a");
|
222
|
fwrite($flat_file,$recordsStr);
|
223
|
fclose($flat_file);
|
224
|
|
225
|
$successive_failures = 0;
|
226
|
|
227
|
$record_file = fopen($record_num_filename,"w");
|
228
|
fwrite($record_file,$start_record_temp);
|
229
|
fclose($record_file);
|
230
|
$start_record = $start_record_temp;
|
231
|
|
232
|
//If number of records is less than request_limit, it means
|
233
|
//the service is out of records.
|
234
|
$finished = ($request_limit > $records->length);
|
235
|
|
236
|
if($itrNum == 1) {
|
237
|
print "Extimated number of records: $estimated_max_from_service\n";
|
238
|
} elseif($itrNum % 10 == 0) {
|
239
|
print "Pulled $start_record records out of $estimated_max_from_service ".
|
240
|
"estimated total records.\n";
|
241
|
}
|
242
|
|
243
|
}
|
244
|
|
245
|
?>
|
246
|
|