Project

General

Profile

1 1 wheeler
<?php
2
#############################################################################
3
# Driver file for taiper2flatClient, a simple script to
4
# consume taiper web services and save the results in a delimited file,
5
# one record per line.  The current delimiter is backtick '`' but
6
# that can be changed by changing the $seperator variable located
7
# in configurableParams.php.
8
#
9
# Command line useage:
10
#   php taiper2flat.php
11
#
12
# dependencies:
13
# php 5+
14
# pear
15
# HTTP_Request
16
# I had to install pear and HTTP_Request before it would work (on Ubuntu):
17
#   sudo apt-get install php-http-request
18
# This should install all dependencies, but just in case, the full
19
# dependency list is:
20
#   php-net-socket
21
#   php-net-url
22
#   php-pear
23
#   php-http-request
24
#############################################################################
25
26
require_once('HTTP/Request.php'); // pear package
27
require_once('tapirRequestTemplate.php');
28
require_once('getAllConcepts.php');
29
30
global $url;
31
global $seperator;
32
33
$flat_filename = "specieslink.txt";
34
$record_num_filename = "recordnum.dat";
35
$error_log_filename = "error.log";
36
37
$supportedConcepts = getAllConcepts();
38
if(!is_array($supportedConcepts)){
39
  $error_log = fopen($error_log_filename,"a");
40
  fwrite($error_log, "Unable to retrieve supported concepts from service, failing.\n");
41
  fclose($error_log);
42
  exit;
43
}
44
45
$successive_failures = 0;
46
47
$start_record = 0;	// Record to begin first request (ordinal position, not ID)
48
if(file_exists($record_num_filename)) {
49
  $start_record = file_get_contents($record_num_filename);
50
}
51
52
$request_limit = 1000;		// Number of records per request; cannot exceed 1000
53
54
//According to TAPIR docs, the estimate from the service
55
//is an upper bound on the number of records.  Start out high.
56
$estimated_max_from_service = 4000000;
57
58
if(!file_exists($flat_filename)) {
59
  $flat_file = fopen($flat_filename,"a");
60
  $file_header = '';
61
  foreach(array_keys($supportedConcepts) as $key){
62
    $file_header .= $key . $seperator;
63
  }
64
  $file_header = substr_replace($file_header ,"",-1);
65
  $file_header .= "\n";
66
  fwrite($flat_file,$file_header);
67
  fclose($flat_file);
68
}
69
70
$finished = false;
71
$itrNum = 0;
72
73
$http_request = new HTTP_Request();
74
75
while(!$finished && $start_record < $estimated_max_from_service) {
76
    $itrNum++;
77
78
    $body = buildRequest($start_record,$request_limit,$supportedConcepts);
79
80
    $http_request->setMethod( 'POST' );
81
    $http_request->addHeader('Content-Type', 'text/xml');
82
    $http_request->addRawPostData( $body );
83
    $http_request->setURL( $url );
84
    $http_request->_timeout = 300;
85
    $http_request->_readTimeout = 300;
86
87
    // This can be used to see the entire request
88
    #$raw_request = $http_request->_buildRequest();
89
    #echo "\n\n" . $raw_request;
90
91
    $http_request->sendRequest();
92
93
    $response = $http_request->getResponseBody();
94
    $code = $http_request->getResponseCode();
95
96
    if ( $code != 200 ) // 200 = OK
97
    {
98
        $label = 'Unknown Error';
99
100
        switch ( $code )
101
        {
102
            case 201: $label = 'Created'; break;
103
            case 202: $label = 'Accepted'; break;
104
            case 203: $label = 'Non-Authoritative Information'; break;
105
            case 204: $label = 'No Content'; break;
106
            case 205: $label = 'Reset Content'; break;
107
            case 206: $label = 'Partial Content'; break;
108
            case 300: $label = 'Multiple Choices'; break;
109
            case 301: $label = 'Moved Permanently'; break;
110
            case 302: $label = 'Found'; break;
111
            case 303: $label = 'See Other'; break;
112
            case 304: $label = 'Not Modified'; break;
113
            case 305: $label = 'Use Proxy'; break;
114
            case 307: $label = 'Temporary Redirect'; break;
115
            case 400: $label = 'Bad Request'; break;
116
            case 401: $label = 'Unauthorized'; break;
117
            case 402: $label = 'Payment Required'; break;
118
            case 403: $label = 'Forbidden'; break;
119
            case 404: $label = 'Not Found'; break;
120
            case 405: $label = 'Method Not Allowed'; break;
121
            case 406: $label = 'Not Acceptable'; break;
122
            case 407: $label = 'Proxy Authentication Required'; break;
123
            case 408: $label = 'Request Timeout'; break;
124
            case 409: $label = 'Conflict'; break;
125
            case 410: $label = 'Gone'; break;
126
            case 411: $label = 'Length Required'; break;
127
            case 412: $label = 'Precondition Failed'; break;
128
            case 413: $label = 'Request Entity Too Large'; break;
129
            case 414: $label = 'Request-URI Too Long'; break;
130
            case 415: $label = 'Unsupported Media Type'; break;
131
            case 416: $label = 'Requested Range Not Satisfiable'; break;
132
            case 417: $label = 'Expectation Failed'; break;
133
            case 500: $label = 'Internal Server Error'; break;
134
            case 501: $label = 'Not Implemented'; break;
135
            case 502: $label = 'Bad Gateway'; break;
136
            case 503: $label = 'Service Unavailable'; break;
137
            case 504: $label = 'Gateway Timeout'; break;
138
            case 505: $label = 'HTTP Version Not Supported'; break;
139
	}
140
141
       $error_log = fopen($error_log_filename,"a");
142
       fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n");
143
144
       $successive_failures += 1;
145
       if($successive_failures >= 3) {
146
         fwrite($error_log, "3 successive failures, quitting\n");
147
         fclose($error_log);
148
         exit;
149
       }
150
151
       fclose($error_log);
152
153
       //Should repeat the same request
154
       continue;
155
    }
156
157
    #Weird encoding bug - need to remove the ^F and ^L characters so
158
    #that the xml parser won't choke.
159
    $xmlDoc = new DOMDocument();
160
    $response = preg_replace("//","6",$response);
161
    $response = preg_replace("//","12",$response);
162
    $xmlDoc->loadXML($response);
163
164
    $errors = $xmlDoc->getElementsByTagName("error");
165
    if($errors->length > 0) {
166
       $error_log = fopen($error_log_filename,"a");
167
       foreach($errors as $error) {
168
         fwrite($error_log, $error->nodeValue . "\n");
169
       }
170
171
       $successive_failures += 1;
172
       if($successive_failures >= 3) {
173
         fwrite($error_log, "3 successive failures, quitting\n");
174
         fclose($error_log);
175
         exit;
176
       }
177
178
       fclose($error_log);
179
180
       //Should repeat the same request
181
       continue;
182
    }
183
184
    $summary = $xmlDoc->getElementsByTagName("summary");
185
    if($summary->length == 0) {
186
       $error_log = fopen($error_log_filename,"a");
187
       fwrite($error_log, "No summary node, assuming there's a missed error.\n");
188
189
       $successive_failures += 1;
190
       if($successive_failures >= 3) {
191
         fwrite($error_log, "3 successive failures, quitting\n");
192
         fclose($error_log);
193
         exit;
194
       }
195
196
       fclose($error_log);
197
       continue;
198
    } else {
199
      $start_record_temp = $summary->item(0)->getAttribute("next");
200
      $estimated_max_from_service = $summary->item(0)->getAttribute("totalMatched");
201
    }
202
203
    $records = $xmlDoc->getElementsByTagName("record");
204
    $recordsStr = '';
205
    foreach($records as $record) {
206
      foreach(array_keys($supportedConcepts) as $key){
207
        $element = $record->getElementsByTagName($key);
208
        if($element->length > 0) {
209
          $val = $element->item(0)->nodeValue;
210
          $val = preg_replace("/$seperator/"," ",$val);
211
          $val = preg_replace("/,/"," ",$val);
212
          $val = preg_replace("/\"/"," ",$val);
213
          $recordsStr .= $val;
214
        }
215
        $recordsStr .= $seperator;
216
      }
217
      $recordsStr = substr_replace($recordsStr ,"",-1);
218
      $recordsStr .= "\n";
219
    }
220
221
    $flat_file = fopen($flat_filename,"a");
222
    fwrite($flat_file,$recordsStr);
223
    fclose($flat_file);
224
225
    $successive_failures = 0;
226
227
    $record_file = fopen($record_num_filename,"w");
228
    fwrite($record_file,$start_record_temp);
229
    fclose($record_file);
230
    $start_record = $start_record_temp;
231
232
    //If number of records is less than request_limit, it means
233
    //the service is out of records.
234
    $finished = ($request_limit > $records->length);
235
236
    if($itrNum == 1) {
237
      print "Extimated number of records:  $estimated_max_from_service\n";
238
    } elseif($itrNum % 10 == 0) {
239
      print "Pulled $start_record records out of $estimated_max_from_service ".
240
            "estimated total records.\n";
241
    }
242
243
}
244
245
?>