Project

General

Profile

1 1 wheeler
<?php
2
#############################################################################
3
# Driver file for taiper2flatClient, a simple script to
4
# consume taiper web services and save the results in a delimited file,
5 1597 aaronmk
# one record per line.  The current delimiter is comma ',' but
6 1 wheeler
# that can be changed by changing the $seperator variable located
7
# in configurableParams.php.
8
#
9
# Command line useage:
10
#   php taiper2flat.php
11
#
12
# dependencies:
13
# php 5+
14
# pear
15
# HTTP_Request
16
# I had to install pear and HTTP_Request before it would work (on Ubuntu):
17
#   sudo apt-get install php-http-request
18
# This should install all dependencies, but just in case, the full
19
# dependency list is:
20
#   php-net-socket
21
#   php-net-url
22
#   php-pear
23
#   php-http-request
24
#############################################################################
25
26
require_once('HTTP/Request.php'); // pear package
27
require_once('tapirRequestTemplate.php');
28
require_once('getAllConcepts.php');
29
30
global $url;
31
global $seperator;
32
33 1670 aaronmk
$flat_filename = "specieslink.specimens.csv";
34 1 wheeler
$record_num_filename = "recordnum.dat";
35
$error_log_filename = "error.log";
36
37 1666 aaronmk
$ctrl_chars = array_flip(array_map("chr", range(0, 31)));
38
foreach (array("\t", "\n", "\r") as $whitespace)
39
    unset($ctrl_chars[$whitespace]);
40
$ctrl_chars = array_keys($ctrl_chars);
41
42 1 wheeler
$supportedConcepts = getAllConcepts();
43
if(!is_array($supportedConcepts)){
44
  $error_log = fopen($error_log_filename,"a");
45
  fwrite($error_log, "Unable to retrieve supported concepts from service, failing.\n");
46
  fclose($error_log);
47
  exit;
48
}
49
50
$successive_failures = 0;
51
52
$start_record = 0;	// Record to begin first request (ordinal position, not ID)
53
if(file_exists($record_num_filename)) {
54
  $start_record = file_get_contents($record_num_filename);
55
}
56
57
$request_limit = 1000;		// Number of records per request; cannot exceed 1000
58
59
//According to TAPIR docs, the estimate from the service
60
//is an upper bound on the number of records.  Start out high.
61
$estimated_max_from_service = 4000000;
62
63
if(!file_exists($flat_filename)) {
64
  $flat_file = fopen($flat_filename,"a");
65
  $file_header = '';
66
  foreach(array_keys($supportedConcepts) as $key){
67
    $file_header .= $key . $seperator;
68
  }
69
  $file_header = substr_replace($file_header ,"",-1);
70
  $file_header .= "\n";
71
  fwrite($flat_file,$file_header);
72
  fclose($flat_file);
73
}
74
75
$finished = false;
76
$itrNum = 0;
77
78
$http_request = new HTTP_Request();
79
80
while(!$finished && $start_record < $estimated_max_from_service) {
81
    $itrNum++;
82
83
    $body = buildRequest($start_record,$request_limit,$supportedConcepts);
84
85
    $http_request->setMethod( 'POST' );
86
    $http_request->addHeader('Content-Type', 'text/xml');
87
    $http_request->addRawPostData( $body );
88
    $http_request->setURL( $url );
89
    $http_request->_timeout = 300;
90
    $http_request->_readTimeout = 300;
91
92
    // This can be used to see the entire request
93
    #$raw_request = $http_request->_buildRequest();
94
    #echo "\n\n" . $raw_request;
95
96
    $http_request->sendRequest();
97
98
    $response = $http_request->getResponseBody();
99
    $code = $http_request->getResponseCode();
100
101
    if ( $code != 200 ) // 200 = OK
102
    {
103
        $label = 'Unknown Error';
104
105
        switch ( $code )
106
        {
107
            case 201: $label = 'Created'; break;
108
            case 202: $label = 'Accepted'; break;
109
            case 203: $label = 'Non-Authoritative Information'; break;
110
            case 204: $label = 'No Content'; break;
111
            case 205: $label = 'Reset Content'; break;
112
            case 206: $label = 'Partial Content'; break;
113
            case 300: $label = 'Multiple Choices'; break;
114
            case 301: $label = 'Moved Permanently'; break;
115
            case 302: $label = 'Found'; break;
116
            case 303: $label = 'See Other'; break;
117
            case 304: $label = 'Not Modified'; break;
118
            case 305: $label = 'Use Proxy'; break;
119
            case 307: $label = 'Temporary Redirect'; break;
120
            case 400: $label = 'Bad Request'; break;
121
            case 401: $label = 'Unauthorized'; break;
122
            case 402: $label = 'Payment Required'; break;
123
            case 403: $label = 'Forbidden'; break;
124
            case 404: $label = 'Not Found'; break;
125
            case 405: $label = 'Method Not Allowed'; break;
126
            case 406: $label = 'Not Acceptable'; break;
127
            case 407: $label = 'Proxy Authentication Required'; break;
128
            case 408: $label = 'Request Timeout'; break;
129
            case 409: $label = 'Conflict'; break;
130
            case 410: $label = 'Gone'; break;
131
            case 411: $label = 'Length Required'; break;
132
            case 412: $label = 'Precondition Failed'; break;
133
            case 413: $label = 'Request Entity Too Large'; break;
134
            case 414: $label = 'Request-URI Too Long'; break;
135
            case 415: $label = 'Unsupported Media Type'; break;
136
            case 416: $label = 'Requested Range Not Satisfiable'; break;
137
            case 417: $label = 'Expectation Failed'; break;
138
            case 500: $label = 'Internal Server Error'; break;
139
            case 501: $label = 'Not Implemented'; break;
140
            case 502: $label = 'Bad Gateway'; break;
141
            case 503: $label = 'Service Unavailable'; break;
142
            case 504: $label = 'Gateway Timeout'; break;
143
            case 505: $label = 'HTTP Version Not Supported'; break;
144
	}
145
146
       $error_log = fopen($error_log_filename,"a");
147
       fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n");
148
149 1601 aaronmk
       #$successive_failures += 1;
150
       #if($successive_failures >= 3) {
151
       #  fwrite($error_log, "3 successive failures, quitting\n");
152
       #  fclose($error_log);
153
       #  exit;
154
       #}
155 1 wheeler
156
       fclose($error_log);
157
158
       //Should repeat the same request
159
       continue;
160
    }
161
162 1666 aaronmk
    $response = filter_var(str_replace($ctrl_chars, "", $response),
163
        FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_HIGH);
164 1 wheeler
    $xmlDoc = new DOMDocument();
165
    $xmlDoc->loadXML($response);
166
167
    $errors = $xmlDoc->getElementsByTagName("error");
168
    if($errors->length > 0) {
169
       $error_log = fopen($error_log_filename,"a");
170
       foreach($errors as $error) {
171
         fwrite($error_log, $error->nodeValue . "\n");
172
       }
173
174 1601 aaronmk
       #$successive_failures += 1;
175
       #if($successive_failures >= 3) {
176
       #  fwrite($error_log, "3 successive failures, quitting\n");
177
       #  fclose($error_log);
178
       #  exit;
179
       #}
180 1 wheeler
181
       fclose($error_log);
182
183
       //Should repeat the same request
184
       continue;
185
    }
186
187
    $summary = $xmlDoc->getElementsByTagName("summary");
188
    if($summary->length == 0) {
189
       $error_log = fopen($error_log_filename,"a");
190
       fwrite($error_log, "No summary node, assuming there's a missed error.\n");
191
192 1601 aaronmk
       #$successive_failures += 1;
193
       #if($successive_failures >= 3) {
194
       #  fwrite($error_log, "3 successive failures, quitting\n");
195
       #  fclose($error_log);
196
       #  exit;
197
       #}
198 1 wheeler
199
       fclose($error_log);
200
       continue;
201
    } else {
202
      $start_record_temp = $summary->item(0)->getAttribute("next");
203
      $estimated_max_from_service = $summary->item(0)->getAttribute("totalMatched");
204
    }
205
206 1597 aaronmk
    $flat_file = fopen($flat_filename,"a");
207
208 1 wheeler
    $records = $xmlDoc->getElementsByTagName("record");
209
    $recordsStr = '';
210
    foreach($records as $record) {
211 1597 aaronmk
      $fields = array();
212 1 wheeler
      foreach(array_keys($supportedConcepts) as $key){
213
        $element = $record->getElementsByTagName($key);
214 1603 aaronmk
        $fields[] = $element->length > 0 ? $element->item(0)->nodeValue : "";
215 1 wheeler
      }
216 1597 aaronmk
      fputcsv($flat_file,$fields,$seperator);
217 1 wheeler
    }
218
219
    fwrite($flat_file,$recordsStr);
220
    fclose($flat_file);
221
222
    $successive_failures = 0;
223
224
    $record_file = fopen($record_num_filename,"w");
225
    fwrite($record_file,$start_record_temp);
226
    fclose($record_file);
227
    $start_record = $start_record_temp;
228
229
    //If number of records is less than request_limit, it means
230
    //the service is out of records.
231
    $finished = ($request_limit > $records->length);
232
233
    if($itrNum == 1) {
234
      print "Extimated number of records:  $estimated_max_from_service\n";
235
    } elseif($itrNum % 10 == 0) {
236
      print "Pulled $start_record records out of $estimated_max_from_service ".
237
            "estimated total records.\n";
238
    }
239
240
}
241
242
?>