Project

General

Profile

« Previous | Next » 

Revision 1598

Moved _archive/tapir2flatClient/trunk/client/ to bin/tapir/

View differences:

_archive/tapir2flatClient/trunk/client/tapir2flat.php
1
<?php
2
#############################################################################
3
# Driver file for taiper2flatClient, a simple script to 
4
# consume taiper web services and save the results in a delimited file,
5
# one record per line.  The current delimiter is comma ',' but
6
# that can be changed by changing the $seperator variable located
7
# in configurableParams.php.
8
#
9
# Command line useage:
10
#   php taiper2flat.php
11
#
12
# dependencies:
13
# php 5+
14
# pear
15
# HTTP_Request
16
# I had to install pear and HTTP_Request before it would work (on Ubuntu):
17
#   sudo apt-get install php-http-request
18
# This should install all dependencies, but just in case, the full
19
# dependency list is:
20
#   php-net-socket 
21
#   php-net-url
22
#   php-pear
23
#   php-http-request
24
#############################################################################
25

  
26
require_once('HTTP/Request.php'); // pear package
27
require_once('tapirRequestTemplate.php'); 
28
require_once('getAllConcepts.php'); 
29

  
30
global $url;
31
global $seperator;
32

  
33
$flat_filename = "specieslink.txt";
34
$record_num_filename = "recordnum.dat";
35
$error_log_filename = "error.log";
36

  
37
$supportedConcepts = getAllConcepts();
38
if(!is_array($supportedConcepts)){
39
  $error_log = fopen($error_log_filename,"a");
40
  fwrite($error_log, "Unable to retrieve supported concepts from service, failing.\n");
41
  fclose($error_log);
42
  exit;
43
}
44

  
45
$successive_failures = 0;
46

  
47
$start_record = 0;	// Record to begin first request (ordinal position, not ID)
48
if(file_exists($record_num_filename)) {
49
  $start_record = file_get_contents($record_num_filename);
50
}
51

  
52
$request_limit = 1000;		// Number of records per request; cannot exceed 1000
53

  
54
//According to TAPIR docs, the estimate from the service 
55
//is an upper bound on the number of records.  Start out high.
56
$estimated_max_from_service = 4000000;  
57

  
58
if(!file_exists($flat_filename)) {
59
  $flat_file = fopen($flat_filename,"a");
60
  $file_header = '';
61
  foreach(array_keys($supportedConcepts) as $key){
62
    $file_header .= $key . $seperator;
63
  }
64
  $file_header = substr_replace($file_header ,"",-1);
65
  $file_header .= "\n";
66
  fwrite($flat_file,$file_header);
67
  fclose($flat_file);
68
}
69

  
70
$finished = false;
71
$itrNum = 0;
72

  
73
$http_request = new HTTP_Request();
74

  
75
while(!$finished && $start_record < $estimated_max_from_service) {
76
    $itrNum++;
77

  
78
    $body = buildRequest($start_record,$request_limit,$supportedConcepts);
79

  
80
    $http_request->setMethod( 'POST' );
81
    $http_request->addHeader('Content-Type', 'text/xml');
82
    $http_request->addRawPostData( $body );
83
    $http_request->setURL( $url );
84
    $http_request->_timeout = 300;
85
    $http_request->_readTimeout = 300;
86

  
87
    // This can be used to see the entire request
88
    #$raw_request = $http_request->_buildRequest();
89
    #echo "\n\n" . $raw_request;
90

  
91
    $http_request->sendRequest();
92

  
93
    $response = $http_request->getResponseBody();
94
    $code = $http_request->getResponseCode();
95

  
96
    if ( $code != 200 ) // 200 = OK
97
    {
98
        $label = 'Unknown Error';
99

  
100
        switch ( $code )
101
        {
102
            case 201: $label = 'Created'; break;
103
            case 202: $label = 'Accepted'; break;
104
            case 203: $label = 'Non-Authoritative Information'; break;
105
            case 204: $label = 'No Content'; break;
106
            case 205: $label = 'Reset Content'; break;
107
            case 206: $label = 'Partial Content'; break;
108
            case 300: $label = 'Multiple Choices'; break;
109
            case 301: $label = 'Moved Permanently'; break;
110
            case 302: $label = 'Found'; break;
111
            case 303: $label = 'See Other'; break;
112
            case 304: $label = 'Not Modified'; break;
113
            case 305: $label = 'Use Proxy'; break;
114
            case 307: $label = 'Temporary Redirect'; break;
115
            case 400: $label = 'Bad Request'; break;
116
            case 401: $label = 'Unauthorized'; break;
117
            case 402: $label = 'Payment Required'; break;
118
            case 403: $label = 'Forbidden'; break;
119
            case 404: $label = 'Not Found'; break;
120
            case 405: $label = 'Method Not Allowed'; break;
121
            case 406: $label = 'Not Acceptable'; break;
122
            case 407: $label = 'Proxy Authentication Required'; break;
123
            case 408: $label = 'Request Timeout'; break;
124
            case 409: $label = 'Conflict'; break;
125
            case 410: $label = 'Gone'; break;
126
            case 411: $label = 'Length Required'; break;
127
            case 412: $label = 'Precondition Failed'; break;
128
            case 413: $label = 'Request Entity Too Large'; break;
129
            case 414: $label = 'Request-URI Too Long'; break;
130
            case 415: $label = 'Unsupported Media Type'; break;
131
            case 416: $label = 'Requested Range Not Satisfiable'; break;
132
            case 417: $label = 'Expectation Failed'; break;
133
            case 500: $label = 'Internal Server Error'; break;
134
            case 501: $label = 'Not Implemented'; break;
135
            case 502: $label = 'Bad Gateway'; break;
136
            case 503: $label = 'Service Unavailable'; break;
137
            case 504: $label = 'Gateway Timeout'; break;
138
            case 505: $label = 'HTTP Version Not Supported'; break;
139
	}
140

  
141
       $error_log = fopen($error_log_filename,"a");
142
       fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n");
143

  
144
       $successive_failures += 1;
145
       if($successive_failures >= 3) {
146
         fwrite($error_log, "3 successive failures, quitting\n");
147
         fclose($error_log);
148
         exit;
149
       }
150
     
151
       fclose($error_log);
152

  
153
       //Should repeat the same request
154
       continue;
155
    }
156

  
157
    #Weird encoding bug - need to remove the ^F and ^L characters so
158
    #that the xml parser won't choke.
159
    $xmlDoc = new DOMDocument();
160
    $response = preg_replace("//","6",$response);
161
    $response = preg_replace("//","12",$response);
162
    $xmlDoc->loadXML($response);
163

  
164
    $errors = $xmlDoc->getElementsByTagName("error");
165
    if($errors->length > 0) {
166
       $error_log = fopen($error_log_filename,"a");
167
       foreach($errors as $error) {
168
         fwrite($error_log, $error->nodeValue . "\n");
169
       }
170

  
171
       $successive_failures += 1;
172
       if($successive_failures >= 3) {
173
         fwrite($error_log, "3 successive failures, quitting\n");
174
         fclose($error_log);
175
         exit;
176
       }
177
     
178
       fclose($error_log);
179

  
180
       //Should repeat the same request
181
       continue;
182
    }
183

  
184
    $summary = $xmlDoc->getElementsByTagName("summary");
185
    if($summary->length == 0) {
186
       $error_log = fopen($error_log_filename,"a");
187
       fwrite($error_log, "No summary node, assuming there's a missed error.\n");
188

  
189
       $successive_failures += 1;
190
       if($successive_failures >= 3) {
191
         fwrite($error_log, "3 successive failures, quitting\n");
192
         fclose($error_log);
193
         exit;
194
       }
195

  
196
       fclose($error_log);
197
       continue;
198
    } else {
199
      $start_record_temp = $summary->item(0)->getAttribute("next");
200
      $estimated_max_from_service = $summary->item(0)->getAttribute("totalMatched");
201
    }
202

  
203
    $flat_file = fopen($flat_filename,"a");
204

  
205
    $records = $xmlDoc->getElementsByTagName("record");
206
    $recordsStr = '';
207
    foreach($records as $record) {
208
      $fields = array();
209
      foreach(array_keys($supportedConcepts) as $key){
210
        $element = $record->getElementsByTagName($key);
211
        if($element->length > 0) {
212
          $fields[] = $element->item(0)->nodeValue;
213
        }
214
      }
215
      fputcsv($flat_file,$fields,$seperator);
216
    }
217

  
218
    fwrite($flat_file,$recordsStr);
219
    fclose($flat_file);
220

  
221
    $successive_failures = 0;
222

  
223
    $record_file = fopen($record_num_filename,"w");
224
    fwrite($record_file,$start_record_temp);
225
    fclose($record_file);
226
    $start_record = $start_record_temp;    
227

  
228
    //If number of records is less than request_limit, it means
229
    //the service is out of records.
230
    $finished = ($request_limit > $records->length);
231

  
232
    if($itrNum == 1) {
233
      print "Extimated number of records:  $estimated_max_from_service\n";
234
    } elseif($itrNum % 10 == 0) {
235
      print "Pulled $start_record records out of $estimated_max_from_service ".
236
            "estimated total records.\n";
237
    }
238

  
239
}
240

  
241
?>
242

  
_archive/tapir2flatClient/trunk/client/tapirRequestTemplate.php
1
<?php
2
#############################################################################
3
# Builds the xml body to be sent to Tapir service.  Only put in a 
4
# seperate file because there is a lot of code-invarient text that would
5
# just make other parts of the script difficult to read.
6
#############################################################################
7

  
8
require_once('configurableParams.php');
9

  
10
function buildStructure($supportedConcepts) {
11
  $structureStr = '';
12
  foreach(array_keys($supportedConcepts) as $key){
13
    $structureStr .= 
14
      "              <xs:element name=\"$key\" type=\"xs:string\" minOccurs=\"0\"/>\n";
15
  }
16
  return $structureStr;
17
}
18

  
19
function buildMap($supportedConcepts) {
20
  $mapStr = '';
21
  foreach(array_keys($supportedConcepts) as $key){
22
    $mapStr .=
23
      "        <node path=\"/records/record/$key\">\n".
24
      "          <concept id=\"$supportedConcepts[$key]\"/>\n".
25
      "        </node>\n";
26
  }
27
  return $mapStr;
28
}
29

  
30
function buildRequest($start,$limit,$supportedConcepts) {
31

  
32
  $xmlRequest = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n".
33
  "<request \n".
34
  "    xmlns=\"http://rs.tdwg.org/tapir/1.0\"\n".
35
  "    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n".
36
  "    xsi:schemaLocation=\"http://rs.tdwg.org/tapir/1.0 \n".
37
  "                        http://rs.tdwg.org/tapir/1.0/schema/tapir.xsd\">\n".
38
  "  <header>\n".
39
  "  </header>\n".
40
  "  <search count=\"true\" start=\"$start\" limit=\"$limit\" envelope=\"true\">\n".
41
  "    <outputModel>\n".
42
  "      <structure>\n".
43
  "        <xs:schema targetNamespace=\"http://example.net/simple_specimen\" xmlns:xs=\"http://www.w3.org/2001/XMLSchema\" xsi:schemaLocation=\"http://www.w3.org/2001/XMLSchema http://www.w3.org/2001/XMLSchema.xsd\">\n".
44
  "          <xs:element name=\"records\">\n".
45
  "            <xs:complexType>\n".
46
  "              <xs:sequence>\n".
47
  "                <xs:element name=\"record\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"unitType\">\n".
48
  "                </xs:element>\n".
49
  "              </xs:sequence>\n".
50
  "            </xs:complexType>\n".
51
  "          </xs:element>\n".
52
  "          <xs:complexType name=\"unitType\">\n".
53
  "            <xs:sequence>\n".
54
               buildStructure($supportedConcepts).
55
  "            </xs:sequence>\n".
56
  "          </xs:complexType>\n".
57
  "        </xs:schema>\n".
58
  "      </structure>\n".
59
  "      <indexingElement path=\"/records/record\"/>\n".
60
  "      <mapping>\n".
61
         buildMap($supportedConcepts).
62
  "      </mapping>\n".
63
  "    </outputModel>\n".
64
  "    <filter>\n".
65
         buildFilter().
66
  "    </filter>\n".
67
  "  </search>\n".
68
  "</request>\n";
69

  
70
  return $xmlRequest;
71
}
72

  
73
function buildCapabilitiesRequest() {
74
  $xmlRequest = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n".
75
  "<request \n".
76
  "    xmlns=\"http://rs.tdwg.org/tapir/1.0\"\n".
77
  "    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n".
78
  "    xsi:schemaLocation=\"http://rs.tdwg.org/tapir/1.0 \n".
79
  "                        http://rs.tdwg.org/tapir/1.0/schema/tapir.xsd\">\n".
80
  "  <header>\n".
81
  "  </header>\n".
82
  "  <capabilities />\n".
83
  "</request>\n";
84

  
85
  return $xmlRequest;
86
}
87

  
88
?>
89

  
_archive/tapir2flatClient/trunk/client/configurableParams.php
1
<?php
2
#############################################################################
3
# File containing all the paramaters that could potentially need to be
4
# configured for the job.
5
#
6
#  $url:  The url for the desired Taiper service
7
#  $seperator:  The delimiter for the flat file.
8
#  buildFilter():  Function to specify the search paramaters.
9
#  desiredConcepts:  Desired data to be returned by the service.
10
#
11
#############################################################################
12

  
13
# The url of the desired Taiper service
14
$url = 'http://tapir.cria.org.br/tapirlink/tapir.php/specieslink';
15

  
16
# The seperator to be used in the flat file to seperate the 
17
# data fields of each record.
18
$seperator = ',';
19

  
20
# Tapir services have an extremely flexable syntax to search for
21
# records with the desired properties.  However, this flexibility
22
# makes it rather difficult to generalize the search possibilities
23
# and quickly come up with appropriate xml.  As such, the current
24
# solution is to build the filter node here.  See the associated
25
# documentation on how to build the filter:
26
#   http://www.tdwg.org/dav/subgroups/tapir/1.0/docs/tdwg_tapir_specification_2010-05-05.htm#toc47
27
function buildFilter() {
28
  $filter = 
29
        '<equals>' .
30
          '<concept id="http://rs.tdwg.org/dwc/dwcore/Kingdom"/>' .
31
          '<literal value="Plantae"/>' .
32
        '</equals>';
33
  return $filter;
34
}
35

  
36
?>
_archive/tapir2flatClient/trunk/client/getAllConcepts.php
1
<?php
2
#############################################################################
3
# This file holds the utility function getAllConcepts which calls the 
4
# tapir service to find out all the concepts (data units) that are 
5
# supported.  This list is necessary so that we know what to ask for
6
# when making the rquest.
7
#############################################################################
8

  
9
require_once('HTTP/Request.php'); // pear package
10
require_once('tapirRequestTemplate.php'); 
11

  
12

  
13

  
14
function getAllConcepts() {
15
    global $url;
16

  
17
    $error_log_filename = "error.log";
18
    $http_request = new HTTP_Request();
19

  
20
    $body = buildCapabilitiesRequest();
21

  
22
    $http_request->setMethod( 'POST' );
23
    $http_request->addHeader('Content-Type', 'text/xml');
24
    $http_request->addRawPostData( $body );
25
    $http_request->setURL( $url );
26
    $http_request->_timeout = 300;
27
    $http_request->_readTimeout = 300;
28

  
29
    // This can be used to see the entire request
30
    #$raw_request = $http_request->_buildRequest();
31
    #echo "\n\n" . $raw_request;
32

  
33
    $http_request->sendRequest();
34

  
35
    $response = $http_request->getResponseBody();
36
    $code = $http_request->getResponseCode();
37

  
38
    if ( $code != 200 ) // 200 = OK
39
    {
40
        $label = 'Unknown Error';
41

  
42
        switch ( $code )
43
        {
44
            case 201: $label = 'Created'; break;
45
            case 202: $label = 'Accepted'; break;
46
            case 203: $label = 'Non-Authoritative Information'; break;
47
            case 204: $label = 'No Content'; break;
48
            case 205: $label = 'Reset Content'; break;
49
            case 206: $label = 'Partial Content'; break;
50
            case 300: $label = 'Multiple Choices'; break;
51
            case 301: $label = 'Moved Permanently'; break;
52
            case 302: $label = 'Found'; break;
53
            case 303: $label = 'See Other'; break;
54
            case 304: $label = 'Not Modified'; break;
55
            case 305: $label = 'Use Proxy'; break;
56
            case 307: $label = 'Temporary Redirect'; break;
57
            case 400: $label = 'Bad Request'; break;
58
            case 401: $label = 'Unauthorized'; break;
59
            case 402: $label = 'Payment Required'; break;
60
            case 403: $label = 'Forbidden'; break;
61
            case 404: $label = 'Not Found'; break;
62
            case 405: $label = 'Method Not Allowed'; break;
63
            case 406: $label = 'Not Acceptable'; break;
64
            case 407: $label = 'Proxy Authentication Required'; break;
65
            case 408: $label = 'Request Timeout'; break;
66
            case 409: $label = 'Conflict'; break;
67
            case 410: $label = 'Gone'; break;
68
            case 411: $label = 'Length Required'; break;
69
            case 412: $label = 'Precondition Failed'; break;
70
            case 413: $label = 'Request Entity Too Large'; break;
71
            case 414: $label = 'Request-URI Too Long'; break;
72
            case 415: $label = 'Unsupported Media Type'; break;
73
            case 416: $label = 'Requested Range Not Satisfiable'; break;
74
            case 417: $label = 'Expectation Failed'; break;
75
            case 500: $label = 'Internal Server Error'; break;
76
            case 501: $label = 'Not Implemented'; break;
77
            case 502: $label = 'Bad Gateway'; break;
78
            case 503: $label = 'Service Unavailable'; break;
79
            case 504: $label = 'Gateway Timeout'; break;
80
            case 505: $label = 'HTTP Version Not Supported'; break;
81
	}
82

  
83
       $error_log = fopen($error_log_filename,"a");
84
       fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n".
85
              "while attempting to request supported concepts\n");
86
       fclose($error_log);
87
       echo( "Service responded with HTTP ".$code." code: ".$label."\n".
88
              "while attempting to request supported concepts\n");
89
       return 0;
90
    }
91

  
92
    #Weird encoding bug - need to remove the ^F and ^L characters so
93
    #that the xml parser won't choke.
94
    $xmlDoc = new DOMDocument();
95
    $response = preg_replace("//","6",$response);
96
    $response = preg_replace("//","12",$response);
97
    $xmlDoc->loadXML($response);
98

  
99
    $errors = $xmlDoc->getElementsByTagName("error");
100
    if($errors->length > 0) {
101
       $error_log = fopen($error_log_filename,"a");
102
       foreach($errors as $error) {
103
         fwrite($error_log, $error->nodeValue . "\n");
104
       }
105
       fclose($error_log);
106
       return 0;
107
    }
108

  
109
    $concepts = $xmlDoc->getElementsByTagName("mappedConcept");
110
    $concepts_array = array();
111
    if($concepts->length == 0) {
112
       $error_log = fopen($error_log_filename,"a");
113
       fwrite($error_log, "No mappedConcept node, assuming there's a missed error.\n");
114
       fclose($error_log);
115
       return 0;
116
    } else {
117
      foreach($concepts as $concept) {
118
        $concept_id = $concept->getAttribute("id");
119
        $concept_key = preg_replace("/\//","_",$concept_id);
120
        $concept_key = preg_replace("/\./","_",$concept_key);
121
        $concept_key = preg_replace("/:/","",$concept_key);
122
        $concepts_array[$concept_key] = $concept_id;
123
      }
124
    }
125
    return $concepts_array;
126
}
127

  
128
?>
129

  
_archive/tapir2flatClient/trunk/client/README
1
tapir2flat.php:  a simple script to 
2
consume taiper web services and save the results in a delimited file,
3
one record per line.  The current delimiter is backtick '`' but
4
that can be changed by changing the $seperator variable located
5
in configurableParams.php.
6

  
7
Command line useage:
8
  php taiper2flat.php
9

  
10
dependencies:
11
  php 5+
12
  pear
13
  HTTP_Request
14

  
15
I had to install pear and HTTP_Request before it would work (on Ubuntu):
16
  sudo apt-get install php-http-request
17

  
18
This should install all dependencies, but just in case, the full dependency list is:
19
  php-net-socket 
20
  php-net-url
21
  php-pear
22
  php-http-request
23

  
24
Necessary files:
25
  tapir2flat.php: Driver file
26
  configurableParams.php: File containing paramaters that can be configured
27
  tapirRequestTemplate.php: Holds the template required to send a tapir request.
28
  getAllConcepts.php: holds utility function to get all possible return values from tapir service.
29

  
30
Possible output files:
31
  specieslink.txt:  Where the retrieved records will be stored.  Each line represents one record, delimited by whatever is configured as the delimitation paramater.
32
  recordnum.dat:  Holds the starting record number for the most recently sent request.
33
  error.log:  Where any errors will be recorded.
34

  
35
Possible configurations: see configurableParams.php as well.
36
  The url of desired tapir service
37
  Delimiter to be used to sperate data fields within each record
38
  The filter sent to tapir service that specifies search parameters, in xml format.  See http://www.tdwg.org/dav/subgroups/tapir/1.0/docs/tdwg_tapir_specification_2010-05-05.htm#toc47 for documentation on how to do this.
39

  
bin/tapir/configurableParams.php
1
<?php
2
#############################################################################
3
# File containing all the paramaters that could potentially need to be
4
# configured for the job.
5
#
6
#  $url:  The url for the desired Taiper service
7
#  $seperator:  The delimiter for the flat file.
8
#  buildFilter():  Function to specify the search paramaters.
9
#  desiredConcepts:  Desired data to be returned by the service.
10
#
11
#############################################################################
12

  
13
# The url of the desired Taiper service
14
$url = 'http://tapir.cria.org.br/tapirlink/tapir.php/specieslink';
15

  
16
# The seperator to be used in the flat file to seperate the 
17
# data fields of each record.
18
$seperator = ',';
19

  
20
# Tapir services have an extremely flexable syntax to search for
21
# records with the desired properties.  However, this flexibility
22
# makes it rather difficult to generalize the search possibilities
23
# and quickly come up with appropriate xml.  As such, the current
24
# solution is to build the filter node here.  See the associated
25
# documentation on how to build the filter:
26
#   http://www.tdwg.org/dav/subgroups/tapir/1.0/docs/tdwg_tapir_specification_2010-05-05.htm#toc47
27
function buildFilter() {
28
  $filter = 
29
        '<equals>' .
30
          '<concept id="http://rs.tdwg.org/dwc/dwcore/Kingdom"/>' .
31
          '<literal value="Plantae"/>' .
32
        '</equals>';
33
  return $filter;
34
}
35

  
36
?>
bin/tapir/tapir2flat.php
1
<?php
2
#############################################################################
3
# Driver file for taiper2flatClient, a simple script to 
4
# consume taiper web services and save the results in a delimited file,
5
# one record per line.  The current delimiter is comma ',' but
6
# that can be changed by changing the $seperator variable located
7
# in configurableParams.php.
8
#
9
# Command line useage:
10
#   php taiper2flat.php
11
#
12
# dependencies:
13
# php 5+
14
# pear
15
# HTTP_Request
16
# I had to install pear and HTTP_Request before it would work (on Ubuntu):
17
#   sudo apt-get install php-http-request
18
# This should install all dependencies, but just in case, the full
19
# dependency list is:
20
#   php-net-socket 
21
#   php-net-url
22
#   php-pear
23
#   php-http-request
24
#############################################################################
25

  
26
require_once('HTTP/Request.php'); // pear package
27
require_once('tapirRequestTemplate.php'); 
28
require_once('getAllConcepts.php'); 
29

  
30
global $url;
31
global $seperator;
32

  
33
$flat_filename = "specieslink.txt";
34
$record_num_filename = "recordnum.dat";
35
$error_log_filename = "error.log";
36

  
37
$supportedConcepts = getAllConcepts();
38
if(!is_array($supportedConcepts)){
39
  $error_log = fopen($error_log_filename,"a");
40
  fwrite($error_log, "Unable to retrieve supported concepts from service, failing.\n");
41
  fclose($error_log);
42
  exit;
43
}
44

  
45
$successive_failures = 0;
46

  
47
$start_record = 0;	// Record to begin first request (ordinal position, not ID)
48
if(file_exists($record_num_filename)) {
49
  $start_record = file_get_contents($record_num_filename);
50
}
51

  
52
$request_limit = 1000;		// Number of records per request; cannot exceed 1000
53

  
54
//According to TAPIR docs, the estimate from the service 
55
//is an upper bound on the number of records.  Start out high.
56
$estimated_max_from_service = 4000000;  
57

  
58
if(!file_exists($flat_filename)) {
59
  $flat_file = fopen($flat_filename,"a");
60
  $file_header = '';
61
  foreach(array_keys($supportedConcepts) as $key){
62
    $file_header .= $key . $seperator;
63
  }
64
  $file_header = substr_replace($file_header ,"",-1);
65
  $file_header .= "\n";
66
  fwrite($flat_file,$file_header);
67
  fclose($flat_file);
68
}
69

  
70
$finished = false;
71
$itrNum = 0;
72

  
73
$http_request = new HTTP_Request();
74

  
75
while(!$finished && $start_record < $estimated_max_from_service) {
76
    $itrNum++;
77

  
78
    $body = buildRequest($start_record,$request_limit,$supportedConcepts);
79

  
80
    $http_request->setMethod( 'POST' );
81
    $http_request->addHeader('Content-Type', 'text/xml');
82
    $http_request->addRawPostData( $body );
83
    $http_request->setURL( $url );
84
    $http_request->_timeout = 300;
85
    $http_request->_readTimeout = 300;
86

  
87
    // This can be used to see the entire request
88
    #$raw_request = $http_request->_buildRequest();
89
    #echo "\n\n" . $raw_request;
90

  
91
    $http_request->sendRequest();
92

  
93
    $response = $http_request->getResponseBody();
94
    $code = $http_request->getResponseCode();
95

  
96
    if ( $code != 200 ) // 200 = OK
97
    {
98
        $label = 'Unknown Error';
99

  
100
        switch ( $code )
101
        {
102
            case 201: $label = 'Created'; break;
103
            case 202: $label = 'Accepted'; break;
104
            case 203: $label = 'Non-Authoritative Information'; break;
105
            case 204: $label = 'No Content'; break;
106
            case 205: $label = 'Reset Content'; break;
107
            case 206: $label = 'Partial Content'; break;
108
            case 300: $label = 'Multiple Choices'; break;
109
            case 301: $label = 'Moved Permanently'; break;
110
            case 302: $label = 'Found'; break;
111
            case 303: $label = 'See Other'; break;
112
            case 304: $label = 'Not Modified'; break;
113
            case 305: $label = 'Use Proxy'; break;
114
            case 307: $label = 'Temporary Redirect'; break;
115
            case 400: $label = 'Bad Request'; break;
116
            case 401: $label = 'Unauthorized'; break;
117
            case 402: $label = 'Payment Required'; break;
118
            case 403: $label = 'Forbidden'; break;
119
            case 404: $label = 'Not Found'; break;
120
            case 405: $label = 'Method Not Allowed'; break;
121
            case 406: $label = 'Not Acceptable'; break;
122
            case 407: $label = 'Proxy Authentication Required'; break;
123
            case 408: $label = 'Request Timeout'; break;
124
            case 409: $label = 'Conflict'; break;
125
            case 410: $label = 'Gone'; break;
126
            case 411: $label = 'Length Required'; break;
127
            case 412: $label = 'Precondition Failed'; break;
128
            case 413: $label = 'Request Entity Too Large'; break;
129
            case 414: $label = 'Request-URI Too Long'; break;
130
            case 415: $label = 'Unsupported Media Type'; break;
131
            case 416: $label = 'Requested Range Not Satisfiable'; break;
132
            case 417: $label = 'Expectation Failed'; break;
133
            case 500: $label = 'Internal Server Error'; break;
134
            case 501: $label = 'Not Implemented'; break;
135
            case 502: $label = 'Bad Gateway'; break;
136
            case 503: $label = 'Service Unavailable'; break;
137
            case 504: $label = 'Gateway Timeout'; break;
138
            case 505: $label = 'HTTP Version Not Supported'; break;
139
	}
140

  
141
       $error_log = fopen($error_log_filename,"a");
142
       fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n");
143

  
144
       $successive_failures += 1;
145
       if($successive_failures >= 3) {
146
         fwrite($error_log, "3 successive failures, quitting\n");
147
         fclose($error_log);
148
         exit;
149
       }
150
     
151
       fclose($error_log);
152

  
153
       //Should repeat the same request
154
       continue;
155
    }
156

  
157
    #Weird encoding bug - need to remove the ^F and ^L characters so
158
    #that the xml parser won't choke.
159
    $xmlDoc = new DOMDocument();
160
    $response = preg_replace("//","6",$response);
161
    $response = preg_replace("//","12",$response);
162
    $xmlDoc->loadXML($response);
163

  
164
    $errors = $xmlDoc->getElementsByTagName("error");
165
    if($errors->length > 0) {
166
       $error_log = fopen($error_log_filename,"a");
167
       foreach($errors as $error) {
168
         fwrite($error_log, $error->nodeValue . "\n");
169
       }
170

  
171
       $successive_failures += 1;
172
       if($successive_failures >= 3) {
173
         fwrite($error_log, "3 successive failures, quitting\n");
174
         fclose($error_log);
175
         exit;
176
       }
177
     
178
       fclose($error_log);
179

  
180
       //Should repeat the same request
181
       continue;
182
    }
183

  
184
    $summary = $xmlDoc->getElementsByTagName("summary");
185
    if($summary->length == 0) {
186
       $error_log = fopen($error_log_filename,"a");
187
       fwrite($error_log, "No summary node, assuming there's a missed error.\n");
188

  
189
       $successive_failures += 1;
190
       if($successive_failures >= 3) {
191
         fwrite($error_log, "3 successive failures, quitting\n");
192
         fclose($error_log);
193
         exit;
194
       }
195

  
196
       fclose($error_log);
197
       continue;
198
    } else {
199
      $start_record_temp = $summary->item(0)->getAttribute("next");
200
      $estimated_max_from_service = $summary->item(0)->getAttribute("totalMatched");
201
    }
202

  
203
    $flat_file = fopen($flat_filename,"a");
204

  
205
    $records = $xmlDoc->getElementsByTagName("record");
206
    $recordsStr = '';
207
    foreach($records as $record) {
208
      $fields = array();
209
      foreach(array_keys($supportedConcepts) as $key){
210
        $element = $record->getElementsByTagName($key);
211
        if($element->length > 0) {
212
          $fields[] = $element->item(0)->nodeValue;
213
        }
214
      }
215
      fputcsv($flat_file,$fields,$seperator);
216
    }
217

  
218
    fwrite($flat_file,$recordsStr);
219
    fclose($flat_file);
220

  
221
    $successive_failures = 0;
222

  
223
    $record_file = fopen($record_num_filename,"w");
224
    fwrite($record_file,$start_record_temp);
225
    fclose($record_file);
226
    $start_record = $start_record_temp;    
227

  
228
    //If number of records is less than request_limit, it means
229
    //the service is out of records.
230
    $finished = ($request_limit > $records->length);
231

  
232
    if($itrNum == 1) {
233
      print "Extimated number of records:  $estimated_max_from_service\n";
234
    } elseif($itrNum % 10 == 0) {
235
      print "Pulled $start_record records out of $estimated_max_from_service ".
236
            "estimated total records.\n";
237
    }
238

  
239
}
240

  
241
?>
242

  
bin/tapir/tapirRequestTemplate.php
1
<?php
2
#############################################################################
3
# Builds the xml body to be sent to Tapir service.  Only put in a 
4
# seperate file because there is a lot of code-invarient text that would
5
# just make other parts of the script difficult to read.
6
#############################################################################
7

  
8
require_once('configurableParams.php');
9

  
10
function buildStructure($supportedConcepts) {
11
  $structureStr = '';
12
  foreach(array_keys($supportedConcepts) as $key){
13
    $structureStr .= 
14
      "              <xs:element name=\"$key\" type=\"xs:string\" minOccurs=\"0\"/>\n";
15
  }
16
  return $structureStr;
17
}
18

  
19
function buildMap($supportedConcepts) {
20
  $mapStr = '';
21
  foreach(array_keys($supportedConcepts) as $key){
22
    $mapStr .=
23
      "        <node path=\"/records/record/$key\">\n".
24
      "          <concept id=\"$supportedConcepts[$key]\"/>\n".
25
      "        </node>\n";
26
  }
27
  return $mapStr;
28
}
29

  
30
function buildRequest($start,$limit,$supportedConcepts) {
31

  
32
  $xmlRequest = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n".
33
  "<request \n".
34
  "    xmlns=\"http://rs.tdwg.org/tapir/1.0\"\n".
35
  "    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n".
36
  "    xsi:schemaLocation=\"http://rs.tdwg.org/tapir/1.0 \n".
37
  "                        http://rs.tdwg.org/tapir/1.0/schema/tapir.xsd\">\n".
38
  "  <header>\n".
39
  "  </header>\n".
40
  "  <search count=\"true\" start=\"$start\" limit=\"$limit\" envelope=\"true\">\n".
41
  "    <outputModel>\n".
42
  "      <structure>\n".
43
  "        <xs:schema targetNamespace=\"http://example.net/simple_specimen\" xmlns:xs=\"http://www.w3.org/2001/XMLSchema\" xsi:schemaLocation=\"http://www.w3.org/2001/XMLSchema http://www.w3.org/2001/XMLSchema.xsd\">\n".
44
  "          <xs:element name=\"records\">\n".
45
  "            <xs:complexType>\n".
46
  "              <xs:sequence>\n".
47
  "                <xs:element name=\"record\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"unitType\">\n".
48
  "                </xs:element>\n".
49
  "              </xs:sequence>\n".
50
  "            </xs:complexType>\n".
51
  "          </xs:element>\n".
52
  "          <xs:complexType name=\"unitType\">\n".
53
  "            <xs:sequence>\n".
54
               buildStructure($supportedConcepts).
55
  "            </xs:sequence>\n".
56
  "          </xs:complexType>\n".
57
  "        </xs:schema>\n".
58
  "      </structure>\n".
59
  "      <indexingElement path=\"/records/record\"/>\n".
60
  "      <mapping>\n".
61
         buildMap($supportedConcepts).
62
  "      </mapping>\n".
63
  "    </outputModel>\n".
64
  "    <filter>\n".
65
         buildFilter().
66
  "    </filter>\n".
67
  "  </search>\n".
68
  "</request>\n";
69

  
70
  return $xmlRequest;
71
}
72

  
73
function buildCapabilitiesRequest() {
74
  $xmlRequest = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n".
75
  "<request \n".
76
  "    xmlns=\"http://rs.tdwg.org/tapir/1.0\"\n".
77
  "    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n".
78
  "    xsi:schemaLocation=\"http://rs.tdwg.org/tapir/1.0 \n".
79
  "                        http://rs.tdwg.org/tapir/1.0/schema/tapir.xsd\">\n".
80
  "  <header>\n".
81
  "  </header>\n".
82
  "  <capabilities />\n".
83
  "</request>\n";
84

  
85
  return $xmlRequest;
86
}
87

  
88
?>
89

  
bin/tapir/README
1
tapir2flat.php:  a simple script to 
2
consume taiper web services and save the results in a delimited file,
3
one record per line.  The current delimiter is backtick '`' but
4
that can be changed by changing the $seperator variable located
5
in configurableParams.php.
6

  
7
Command line useage:
8
  php taiper2flat.php
9

  
10
dependencies:
11
  php 5+
12
  pear
13
  HTTP_Request
14

  
15
I had to install pear and HTTP_Request before it would work (on Ubuntu):
16
  sudo apt-get install php-http-request
17

  
18
This should install all dependencies, but just in case, the full dependency list is:
19
  php-net-socket 
20
  php-net-url
21
  php-pear
22
  php-http-request
23

  
24
Necessary files:
25
  tapir2flat.php: Driver file
26
  configurableParams.php: File containing paramaters that can be configured
27
  tapirRequestTemplate.php: Holds the template required to send a tapir request.
28
  getAllConcepts.php: holds utility function to get all possible return values from tapir service.
29

  
30
Possible output files:
31
  specieslink.txt:  Where the retrieved records will be stored.  Each line represents one record, delimited by whatever is configured as the delimitation paramater.
32
  recordnum.dat:  Holds the starting record number for the most recently sent request.
33
  error.log:  Where any errors will be recorded.
34

  
35
Possible configurations: see configurableParams.php as well.
36
  The url of desired tapir service
37
  Delimiter to be used to sperate data fields within each record
38
  The filter sent to tapir service that specifies search parameters, in xml format.  See http://www.tdwg.org/dav/subgroups/tapir/1.0/docs/tdwg_tapir_specification_2010-05-05.htm#toc47 for documentation on how to do this.
39

  
bin/tapir/getAllConcepts.php
1
<?php
2
#############################################################################
3
# This file holds the utility function getAllConcepts which calls the 
4
# tapir service to find out all the concepts (data units) that are 
5
# supported.  This list is necessary so that we know what to ask for
6
# when making the rquest.
7
#############################################################################
8

  
9
require_once('HTTP/Request.php'); // pear package
10
require_once('tapirRequestTemplate.php'); 
11

  
12

  
13

  
14
function getAllConcepts() {
15
    global $url;
16

  
17
    $error_log_filename = "error.log";
18
    $http_request = new HTTP_Request();
19

  
20
    $body = buildCapabilitiesRequest();
21

  
22
    $http_request->setMethod( 'POST' );
23
    $http_request->addHeader('Content-Type', 'text/xml');
24
    $http_request->addRawPostData( $body );
25
    $http_request->setURL( $url );
26
    $http_request->_timeout = 300;
27
    $http_request->_readTimeout = 300;
28

  
29
    // This can be used to see the entire request
30
    #$raw_request = $http_request->_buildRequest();
31
    #echo "\n\n" . $raw_request;
32

  
33
    $http_request->sendRequest();
34

  
35
    $response = $http_request->getResponseBody();
36
    $code = $http_request->getResponseCode();
37

  
38
    if ( $code != 200 ) // 200 = OK
39
    {
40
        $label = 'Unknown Error';
41

  
42
        switch ( $code )
43
        {
44
            case 201: $label = 'Created'; break;
45
            case 202: $label = 'Accepted'; break;
46
            case 203: $label = 'Non-Authoritative Information'; break;
47
            case 204: $label = 'No Content'; break;
48
            case 205: $label = 'Reset Content'; break;
49
            case 206: $label = 'Partial Content'; break;
50
            case 300: $label = 'Multiple Choices'; break;
51
            case 301: $label = 'Moved Permanently'; break;
52
            case 302: $label = 'Found'; break;
53
            case 303: $label = 'See Other'; break;
54
            case 304: $label = 'Not Modified'; break;
55
            case 305: $label = 'Use Proxy'; break;
56
            case 307: $label = 'Temporary Redirect'; break;
57
            case 400: $label = 'Bad Request'; break;
58
            case 401: $label = 'Unauthorized'; break;
59
            case 402: $label = 'Payment Required'; break;
60
            case 403: $label = 'Forbidden'; break;
61
            case 404: $label = 'Not Found'; break;
62
            case 405: $label = 'Method Not Allowed'; break;
63
            case 406: $label = 'Not Acceptable'; break;
64
            case 407: $label = 'Proxy Authentication Required'; break;
65
            case 408: $label = 'Request Timeout'; break;
66
            case 409: $label = 'Conflict'; break;
67
            case 410: $label = 'Gone'; break;
68
            case 411: $label = 'Length Required'; break;
69
            case 412: $label = 'Precondition Failed'; break;
70
            case 413: $label = 'Request Entity Too Large'; break;
71
            case 414: $label = 'Request-URI Too Long'; break;
72
            case 415: $label = 'Unsupported Media Type'; break;
73
            case 416: $label = 'Requested Range Not Satisfiable'; break;
74
            case 417: $label = 'Expectation Failed'; break;
75
            case 500: $label = 'Internal Server Error'; break;
76
            case 501: $label = 'Not Implemented'; break;
77
            case 502: $label = 'Bad Gateway'; break;
78
            case 503: $label = 'Service Unavailable'; break;
79
            case 504: $label = 'Gateway Timeout'; break;
80
            case 505: $label = 'HTTP Version Not Supported'; break;
81
	}
82

  
83
       $error_log = fopen($error_log_filename,"a");
84
       fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n".
85
              "while attempting to request supported concepts\n");
86
       fclose($error_log);
87
       echo( "Service responded with HTTP ".$code." code: ".$label."\n".
88
              "while attempting to request supported concepts\n");
89
       return 0;
90
    }
91

  
92
    #Weird encoding bug - need to remove the ^F and ^L characters so
93
    #that the xml parser won't choke.
94
    $xmlDoc = new DOMDocument();
95
    $response = preg_replace("//","6",$response);
96
    $response = preg_replace("//","12",$response);
97
    $xmlDoc->loadXML($response);
98

  
99
    $errors = $xmlDoc->getElementsByTagName("error");
100
    if($errors->length > 0) {
101
       $error_log = fopen($error_log_filename,"a");
102
       foreach($errors as $error) {
103
         fwrite($error_log, $error->nodeValue . "\n");
104
       }
105
       fclose($error_log);
106
       return 0;
107
    }
108

  
109
    $concepts = $xmlDoc->getElementsByTagName("mappedConcept");
110
    $concepts_array = array();
111
    if($concepts->length == 0) {
112
       $error_log = fopen($error_log_filename,"a");
113
       fwrite($error_log, "No mappedConcept node, assuming there's a missed error.\n");
114
       fclose($error_log);
115
       return 0;
116
    } else {
117
      foreach($concepts as $concept) {
118
        $concept_id = $concept->getAttribute("id");
119
        $concept_key = preg_replace("/\//","_",$concept_id);
120
        $concept_key = preg_replace("/\./","_",$concept_key);
121
        $concept_key = preg_replace("/:/","",$concept_key);
122
        $concepts_array[$concept_key] = $concept_id;
123
      }
124
    }
125
    return $concepts_array;
126
}
127

  
128
?>
129

  

Also available in: Unified diff