«
Previous
|
Next
»
Revision 1
Added by Matt Wheeler over 14 years ago
tapir2flatClient/trunk/client/configurableParams.php | ||
---|---|---|
1 |
<?php |
|
2 |
############################################################################# |
|
3 |
# File containing all the paramaters that could potentially need to be |
|
4 |
# configured for the job. |
|
5 |
# |
|
6 |
# $url: The url for the desired Taiper service |
|
7 |
# $seperator: The delimiter for the flat file. |
|
8 |
# buildFilter(): Function to specify the search paramaters. |
|
9 |
# desiredConcepts: Desired data to be returned by the service. |
|
10 |
# |
|
11 |
############################################################################# |
|
12 |
|
|
13 |
# The url of the desired Taiper service |
|
14 |
$url = 'http://tapir.cria.org.br/tapirlink/tapir.php/specieslink'; |
|
15 |
|
|
16 |
# The seperator to be used in the flat file to seperate the |
|
17 |
# data fields of each record. |
|
18 |
$seperator = '`'; |
|
19 |
|
|
20 |
# Tapir services have an extremely flexable syntax to search for |
|
21 |
# records with the desired properties. However, this flexibility |
|
22 |
# makes it rather difficult to generalize the search possibilities |
|
23 |
# and quickly come up with appropriate xml. As such, the current |
|
24 |
# solution is to build the filter node here. See the associated |
|
25 |
# documentation on how to build the filter: |
|
26 |
# http://www.tdwg.org/dav/subgroups/tapir/1.0/docs/tdwg_tapir_specification_2010-05-05.htm#toc47 |
|
27 |
function buildFilter() { |
|
28 |
$filter = |
|
29 |
'<equals>' . |
|
30 |
'<concept id="http://rs.tdwg.org/dwc/dwcore/Kingdom"/>' . |
|
31 |
'<literal value="Plantae"/>' . |
|
32 |
'</equals>'; |
|
33 |
return $filter; |
|
34 |
} |
|
35 |
|
|
36 |
# Tapir services store their data as concepts. This array (hash) will map the desired |
|
37 |
# concept to the name that it will be given in the flat file. For instance: |
|
38 |
# 'dwcoreGlobalUniqueIdentifier' => 'http://rs.tdwg.org/dwc/dwcore/GlobalUniqueIdentifier' |
|
39 |
# is interpreted as find the GlobalUniqueIdentifier for each record and store it in |
|
40 |
# the column denoted as dwcoreGlobalUniqueIdentifier in the delimited file. If the |
|
41 |
# concept does not exist for a given record, an empty string will be put in its place. |
|
42 |
$desiredConcepts = array( |
|
43 |
'dwcoreGlobalUniqueIdentifier' => 'http://rs.tdwg.org/dwc/dwcore/GlobalUniqueIdentifier', |
|
44 |
'dwcoreDateLastModified' => 'http://rs.tdwg.org/dwc/dwcore/GlobalUniqueIdentifier', |
|
45 |
'dwcoreBasisOfRecord' => 'http://rs.tdwg.org/dwc/dwcore/BasisOfRecord', |
|
46 |
'dwcoreInstitutionCode' => 'http://rs.tdwg.org/dwc/dwcore/InstitutionCode', |
|
47 |
'dwcoreCollectionCode' => 'http://rs.tdwg.org/dwc/dwcore/CollectionCode', |
|
48 |
'dwcoreCatalogNumber' => 'http://rs.tdwg.org/dwc/dwcore/CatalogNumber', |
|
49 |
'dwcoreRemarks' => 'http://rs.tdwg.org/dwc/dwcore/Remarks', |
|
50 |
'dwcoreScientificName' => 'http://rs.tdwg.org/dwc/dwcore/ScientificName', |
|
51 |
'dwcoreKingdom' => 'http://rs.tdwg.org/dwc/dwcore/Kingdom', |
|
52 |
'dwcorePhylum' => 'http://rs.tdwg.org/dwc/dwcore/Phylum', |
|
53 |
'dwcoreClass' => 'http://rs.tdwg.org/dwc/dwcore/Class', |
|
54 |
'dwcoreOrder' => 'http://rs.tdwg.org/dwc/dwcore/Order', |
|
55 |
'dwcoreFamily' => 'http://rs.tdwg.org/dwc/dwcore/Family', |
|
56 |
'dwcoreGenus' => 'http://rs.tdwg.org/dwc/dwcore/Genus', |
|
57 |
'dwcoreSpecificEpithet' => 'http://rs.tdwg.org/dwc/dwcore/InfraspecificEpithet', |
|
58 |
'dwcoreInfraspecificEpithet' => 'http://rs.tdwg.org/dwc/dwcore/InfraspecificEpithet', |
|
59 |
'dwcoreAuthorYearOfScientificName' => 'http://rs.tdwg.org/dwc/dwcore/AuthorYearOfScientificName', |
|
60 |
'dwcoreContinent' => 'http://rs.tdwg.org/dwc/dwcore/Continent', |
|
61 |
'dwcoreCountry' => 'http://rs.tdwg.org/dwc/dwcore/Country', |
|
62 |
'dwcoreStateProvince' => 'http://rs.tdwg.org/dwc/dwcore/StateProvince', |
|
63 |
'dwcoreCounty' => 'http://rs.tdwg.org/dwc/dwcore/County', |
|
64 |
'dwcoreLocality' => 'http://rs.tdwg.org/dwc/dwcore/Locality', |
|
65 |
'dwcoreMinimumElevationInMeters' => 'http://rs.tdwg.org/dwc/dwcore/MinimumElevationInMeters', |
|
66 |
'dwcoreMaximumElevationInMeters' => 'http://rs.tdwg.org/dwc/dwcore/MaximumElevationInMeters', |
|
67 |
'dwcoreMinimumDepthInMeters' => 'http://rs.tdwg.org/dwc/dwcore/MinimumDepthInMeters', |
|
68 |
'dwcoreMaximumDepthInMeters' => 'http://rs.tdwg.org/dwc/dwcore/MaximumDepthInMeters', |
|
69 |
'dwcoreDayOfYear' => 'http://rs.tdwg.org/dwc/dwcore/DayOfYear', |
|
70 |
'dwcoreCollector' => 'http://rs.tdwg.org/dwc/dwcore/Collector', |
|
71 |
'dwcoreSex' => 'http://rs.tdwg.org/dwc/dwcore/Sex', |
|
72 |
'curatorialIdentifiedBy' => 'http://rs.tdwg.org/dwc/curatorial/IdentifiedBy', |
|
73 |
'curatorialCollectorNumber' => 'http://rs.tdwg.org/dwc/curatorial/CollectorNumber', |
|
74 |
'curatorialFieldNumber' => 'http://rs.tdwg.org/dwc/curatorial/FieldNumber', |
|
75 |
'curatorialTypeStatus' => 'http://rs.tdwg.org/dwc/curatorial/TypeStatus', |
|
76 |
'curatorialIndividualCount' => 'http://rs.tdwg.org/dwc/curatorial/IndividualCount', |
|
77 |
'geospatialDecimalLatitude' => 'http://rs.tdwg.org/dwc/geospatial/DecimalLatitude', |
|
78 |
'geospatialDecimalLongitude' => 'http://rs.tdwg.org/dwc/geospatial/DecimalLongitude', |
|
79 |
'geospatialGeodeticDatum' => 'http://rs.tdwg.org/dwc/geospatial/GeodeticDatum', |
|
80 |
'darwin1DateLastModified' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/DateLastModified', |
|
81 |
'darwin1InstitutionCode' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/InstitutionCode', |
|
82 |
'darwin1CollectionCode' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/CollectionCode', |
|
83 |
'darwin1CatalogNumber' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/CatalogNumber', |
|
84 |
'darwin1ScientificName' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/ScientificName', |
|
85 |
'darwin1BasisOfRecord' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/BasisOfRecord', |
|
86 |
'darwin1Kingdom' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Kingdom', |
|
87 |
'darwin1Phylum' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Phylum', |
|
88 |
'darwin1Class' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Class', |
|
89 |
'darwin1Order' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Order', |
|
90 |
'darwin1Family' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Family', |
|
91 |
'darwin1Genus' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Genus', |
|
92 |
'darwin1Species' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Species', |
|
93 |
'darwin1Subspecies' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Subspecies', |
|
94 |
'darwin1ScientificNameAuthor' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/ScientificNameAuthor', |
|
95 |
'darwin1IdentifiedBy' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/IdentifiedBy', |
|
96 |
'darwin1YearIdentified' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/YearIdentified', |
|
97 |
'darwin1MonthIdentified' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/MonthIdentified', |
|
98 |
'darwin1DayIdentified' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/DayIdentified', |
|
99 |
'darwin1TypeStatus' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/TypeStatus', |
|
100 |
'darwin1CollectorNumber' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/CollectorNumber', |
|
101 |
'darwin1FieldNumber' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/FieldNumber', |
|
102 |
'darwin1Collector' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Collector', |
|
103 |
'darwin1YearCollected' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/YearCollected', |
|
104 |
'darwin1MonthCollected' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/MonthCollected', |
|
105 |
'darwin1DayCollected' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/DayCollected', |
|
106 |
'darwin1JulianDay' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/JulianDay', |
|
107 |
'darwin1TimeOfDay' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/TimeOfDay', |
|
108 |
'darwin1ContinentOcean' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/ContinentOcean', |
|
109 |
'darwin1Country' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Country', |
|
110 |
'darwin1StateProvince' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/StateProvince', |
|
111 |
'darwin1County' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/County', |
|
112 |
'darwin1Locality' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Locality', |
|
113 |
'darwin1Longitude' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Longitude', |
|
114 |
'darwin1Latitude' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Latitude', |
|
115 |
'darwin1CoordinatePrecision' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/CoordinatePrecision', |
|
116 |
'darwin1BoundingBox' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/BoundingBox', |
|
117 |
'darwin1MinimumElevation' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/MinimumElevation', |
|
118 |
'darwin1MaximumElevation' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/MaximumElevation', |
|
119 |
'darwin1MinimumDepth' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/MinimumDepth', |
|
120 |
'darwin1MaximumDepth' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/MaximumDepth', |
|
121 |
'darwin1Sex' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Sex', |
|
122 |
'darwin1PreparationType' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/PreparationType', |
|
123 |
'darwin1IndividualCount' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/IndividualCount', |
|
124 |
'darwin1PreviousCatalogNumber' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/PreviousCatalogNumber', |
|
125 |
'darwin1RelationshipType' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/RelationshipType', |
|
126 |
'darwin1RelatedCatalogItem' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/RelatedCatalogItem', |
|
127 |
'darwin1Notes' => 'http://digir.net/schema/conceptual/darwin/2003/1.0/Notes' |
|
128 |
); |
|
129 |
|
|
130 |
?> |
tapir2flatClient/trunk/client/tapirRequestTemplate.php | ||
---|---|---|
1 |
<?php |
|
2 |
############################################################################# |
|
3 |
# Builds the xml body to be sent to Tapir service. Only put in a |
|
4 |
# seperate file because there is a lot of code-invarient text that would |
|
5 |
# just make other parts of the script difficult to read. |
|
6 |
############################################################################# |
|
7 |
|
|
8 |
require_once('configurableParams.php'); |
|
9 |
|
|
10 |
function buildStructure($supportedConcepts) { |
|
11 |
$structureStr = ''; |
|
12 |
foreach(array_keys($supportedConcepts) as $key){ |
|
13 |
$structureStr .= |
|
14 |
" <xs:element name=\"$key\" type=\"xs:string\" minOccurs=\"0\"/>\n"; |
|
15 |
} |
|
16 |
return $structureStr; |
|
17 |
} |
|
18 |
|
|
19 |
function buildMap($supportedConcepts) { |
|
20 |
$mapStr = ''; |
|
21 |
foreach(array_keys($supportedConcepts) as $key){ |
|
22 |
$mapStr .= |
|
23 |
" <node path=\"/records/record/$key\">\n". |
|
24 |
" <concept id=\"$supportedConcepts[$key]\"/>\n". |
|
25 |
" </node>\n"; |
|
26 |
} |
|
27 |
return $mapStr; |
|
28 |
} |
|
29 |
|
|
30 |
function buildRequest($start,$limit,$supportedConcepts) { |
|
31 |
|
|
32 |
$xmlRequest = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n". |
|
33 |
"<request \n". |
|
34 |
" xmlns=\"http://rs.tdwg.org/tapir/1.0\"\n". |
|
35 |
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n". |
|
36 |
" xsi:schemaLocation=\"http://rs.tdwg.org/tapir/1.0 \n". |
|
37 |
" http://rs.tdwg.org/tapir/1.0/schema/tapir.xsd\">\n". |
|
38 |
" <header>\n". |
|
39 |
" </header>\n". |
|
40 |
" <search count=\"true\" start=\"$start\" limit=\"$limit\" envelope=\"true\">\n". |
|
41 |
" <outputModel>\n". |
|
42 |
" <structure>\n". |
|
43 |
" <xs:schema targetNamespace=\"http://example.net/simple_specimen\" xmlns:xs=\"http://www.w3.org/2001/XMLSchema\" xsi:schemaLocation=\"http://www.w3.org/2001/XMLSchema http://www.w3.org/2001/XMLSchema.xsd\">\n". |
|
44 |
" <xs:element name=\"records\">\n". |
|
45 |
" <xs:complexType>\n". |
|
46 |
" <xs:sequence>\n". |
|
47 |
" <xs:element name=\"record\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"unitType\">\n". |
|
48 |
" </xs:element>\n". |
|
49 |
" </xs:sequence>\n". |
|
50 |
" </xs:complexType>\n". |
|
51 |
" </xs:element>\n". |
|
52 |
" <xs:complexType name=\"unitType\">\n". |
|
53 |
" <xs:sequence>\n". |
|
54 |
buildStructure($supportedConcepts). |
|
55 |
" </xs:sequence>\n". |
|
56 |
" </xs:complexType>\n". |
|
57 |
" </xs:schema>\n". |
|
58 |
" </structure>\n". |
|
59 |
" <indexingElement path=\"/records/record\"/>\n". |
|
60 |
" <mapping>\n". |
|
61 |
buildMap($supportedConcepts). |
|
62 |
" </mapping>\n". |
|
63 |
" </outputModel>\n". |
|
64 |
" <filter>\n". |
|
65 |
buildFilter(). |
|
66 |
" </filter>\n". |
|
67 |
" </search>\n". |
|
68 |
"</request>\n"; |
|
69 |
|
|
70 |
return $xmlRequest; |
|
71 |
} |
|
72 |
|
|
73 |
function buildCapabilitiesRequest() { |
|
74 |
$xmlRequest = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n". |
|
75 |
"<request \n". |
|
76 |
" xmlns=\"http://rs.tdwg.org/tapir/1.0\"\n". |
|
77 |
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n". |
|
78 |
" xsi:schemaLocation=\"http://rs.tdwg.org/tapir/1.0 \n". |
|
79 |
" http://rs.tdwg.org/tapir/1.0/schema/tapir.xsd\">\n". |
|
80 |
" <header>\n". |
|
81 |
" </header>\n". |
|
82 |
" <capabilities />\n". |
|
83 |
"</request>\n"; |
|
84 |
|
|
85 |
return $xmlRequest; |
|
86 |
} |
|
87 |
|
|
88 |
?> |
|
89 |
|
tapir2flatClient/trunk/client/README | ||
---|---|---|
1 |
tapir2flat.php: a simple script to |
|
2 |
consume taiper web services and save the results in a delimited file, |
|
3 |
one record per line. The current delimiter is backtick '`' but |
|
4 |
that can be changed by changing the $seperator variable located |
|
5 |
in configurableParams.php. |
|
6 |
|
|
7 |
Command line useage: |
|
8 |
php taiper2flat.php |
|
9 |
|
|
10 |
dependencies: |
|
11 |
php 5+ |
|
12 |
pear |
|
13 |
HTTP_Request |
|
14 |
|
|
15 |
I had to install pear and HTTP_Request before it would work (on Ubuntu): |
|
16 |
sudo apt-get install php-http-request |
|
17 |
|
|
18 |
This should install all dependencies, but just in case, the full dependency list is: |
|
19 |
php-net-socket |
|
20 |
php-net-url |
|
21 |
php-pear |
|
22 |
php-http-request |
|
23 |
|
|
24 |
Necessary files: |
|
25 |
tapir2flat.php: Driver file |
|
26 |
configurableParams.php: File containing paramaters that can be configured |
|
27 |
tapirRequestTemplate.php: Holds the template required to send a tapir request. |
|
28 |
getAllConcepts.php: holds utility function to get all possible return values from tapir service. |
|
29 |
|
|
30 |
Possible output files: |
|
31 |
specieslink.txt: Where the retrieved records will be stored. Each line represents one record, delimited by whatever is configured as the delimitation paramater. |
|
32 |
recordnum.dat: Holds the starting record number for the most recently sent request. |
|
33 |
error.log: Where any errors will be recorded. |
|
34 |
|
|
35 |
Possible configurations: see configurableParams.php as well. |
|
36 |
The url of desired tapir service |
|
37 |
Delimiter to be used to sperate data fields within each record |
|
38 |
The filter sent to tapir service that specifies search parameters, in xml format. See http://www.tdwg.org/dav/subgroups/tapir/1.0/docs/tdwg_tapir_specification_2010-05-05.htm#toc47 for documentation on how to do this. |
|
39 |
|
tapir2flatClient/trunk/client/getAllConcepts.php | ||
---|---|---|
1 |
<?php |
|
2 |
############################################################################# |
|
3 |
# This file holds the utility function getAllConcepts which calls the |
|
4 |
# tapir service to find out all the concepts (data units) that are |
|
5 |
# supported. This list is necessary so that we know what to ask for |
|
6 |
# when making the rquest. |
|
7 |
############################################################################# |
|
8 |
|
|
9 |
require_once('HTTP/Request.php'); // pear package |
|
10 |
require_once('tapirRequestTemplate.php'); |
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
function getAllConcepts() { |
|
15 |
global $url; |
|
16 |
|
|
17 |
$error_log_filename = "error.log"; |
|
18 |
$http_request = new HTTP_Request(); |
|
19 |
|
|
20 |
$body = buildCapabilitiesRequest(); |
|
21 |
|
|
22 |
$http_request->setMethod( 'POST' ); |
|
23 |
$http_request->addHeader('Content-Type', 'text/xml'); |
|
24 |
$http_request->addRawPostData( $body ); |
|
25 |
$http_request->setURL( $url ); |
|
26 |
$http_request->_timeout = 300; |
|
27 |
$http_request->_readTimeout = 300; |
|
28 |
|
|
29 |
// This can be used to see the entire request |
|
30 |
#$raw_request = $http_request->_buildRequest(); |
|
31 |
#echo "\n\n" . $raw_request; |
|
32 |
|
|
33 |
$http_request->sendRequest(); |
|
34 |
|
|
35 |
$response = $http_request->getResponseBody(); |
|
36 |
$code = $http_request->getResponseCode(); |
|
37 |
|
|
38 |
if ( $code != 200 ) // 200 = OK |
|
39 |
{ |
|
40 |
$label = 'Unknown Error'; |
|
41 |
|
|
42 |
switch ( $code ) |
|
43 |
{ |
|
44 |
case 201: $label = 'Created'; break; |
|
45 |
case 202: $label = 'Accepted'; break; |
|
46 |
case 203: $label = 'Non-Authoritative Information'; break; |
|
47 |
case 204: $label = 'No Content'; break; |
|
48 |
case 205: $label = 'Reset Content'; break; |
|
49 |
case 206: $label = 'Partial Content'; break; |
|
50 |
case 300: $label = 'Multiple Choices'; break; |
|
51 |
case 301: $label = 'Moved Permanently'; break; |
|
52 |
case 302: $label = 'Found'; break; |
|
53 |
case 303: $label = 'See Other'; break; |
|
54 |
case 304: $label = 'Not Modified'; break; |
|
55 |
case 305: $label = 'Use Proxy'; break; |
|
56 |
case 307: $label = 'Temporary Redirect'; break; |
|
57 |
case 400: $label = 'Bad Request'; break; |
|
58 |
case 401: $label = 'Unauthorized'; break; |
|
59 |
case 402: $label = 'Payment Required'; break; |
|
60 |
case 403: $label = 'Forbidden'; break; |
|
61 |
case 404: $label = 'Not Found'; break; |
|
62 |
case 405: $label = 'Method Not Allowed'; break; |
|
63 |
case 406: $label = 'Not Acceptable'; break; |
|
64 |
case 407: $label = 'Proxy Authentication Required'; break; |
|
65 |
case 408: $label = 'Request Timeout'; break; |
|
66 |
case 409: $label = 'Conflict'; break; |
|
67 |
case 410: $label = 'Gone'; break; |
|
68 |
case 411: $label = 'Length Required'; break; |
|
69 |
case 412: $label = 'Precondition Failed'; break; |
|
70 |
case 413: $label = 'Request Entity Too Large'; break; |
|
71 |
case 414: $label = 'Request-URI Too Long'; break; |
|
72 |
case 415: $label = 'Unsupported Media Type'; break; |
|
73 |
case 416: $label = 'Requested Range Not Satisfiable'; break; |
|
74 |
case 417: $label = 'Expectation Failed'; break; |
|
75 |
case 500: $label = 'Internal Server Error'; break; |
|
76 |
case 501: $label = 'Not Implemented'; break; |
|
77 |
case 502: $label = 'Bad Gateway'; break; |
|
78 |
case 503: $label = 'Service Unavailable'; break; |
|
79 |
case 504: $label = 'Gateway Timeout'; break; |
|
80 |
case 505: $label = 'HTTP Version Not Supported'; break; |
|
81 |
} |
|
82 |
|
|
83 |
$error_log = fopen($error_log_filename,"a"); |
|
84 |
fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n". |
|
85 |
"while attempting to request supported concepts\n"); |
|
86 |
fclose($error_log); |
|
87 |
echo( "Service responded with HTTP ".$code." code: ".$label."\n". |
|
88 |
"while attempting to request supported concepts\n"); |
|
89 |
return 0; |
|
90 |
} |
|
91 |
|
|
92 |
#Weird encoding bug - need to remove the ^F and ^L characters so |
|
93 |
#that the xml parser won't choke. |
|
94 |
$xmlDoc = new DOMDocument(); |
|
95 |
$response = preg_replace("//","6",$response); |
|
96 |
$response = preg_replace("//","12",$response); |
|
97 |
$xmlDoc->loadXML($response); |
|
98 |
|
|
99 |
$errors = $xmlDoc->getElementsByTagName("error"); |
|
100 |
if($errors->length > 0) { |
|
101 |
$error_log = fopen($error_log_filename,"a"); |
|
102 |
foreach($errors as $error) { |
|
103 |
fwrite($error_log, $error->nodeValue . "\n"); |
|
104 |
} |
|
105 |
fclose($error_log); |
|
106 |
return 0; |
|
107 |
} |
|
108 |
|
|
109 |
$concepts = $xmlDoc->getElementsByTagName("mappedConcept"); |
|
110 |
$concepts_array = array(); |
|
111 |
if($concepts->length == 0) { |
|
112 |
$error_log = fopen($error_log_filename,"a"); |
|
113 |
fwrite($error_log, "No mappedConcept node, assuming there's a missed error.\n"); |
|
114 |
fclose($error_log); |
|
115 |
return 0; |
|
116 |
} else { |
|
117 |
foreach($concepts as $concept) { |
|
118 |
$concept_id = $concept->getAttribute("id"); |
|
119 |
$concept_key = preg_replace("/\//","_",$concept_id); |
|
120 |
$concept_key = preg_replace("/\./","_",$concept_key); |
|
121 |
$concept_key = preg_replace("/:/","",$concept_key); |
|
122 |
$concepts_array[$concept_key] = $concept_id; |
|
123 |
} |
|
124 |
} |
|
125 |
return $concepts_array; |
|
126 |
} |
|
127 |
|
|
128 |
?> |
|
129 |
|
tapir2flatClient/trunk/client/tapir2flat.php | ||
---|---|---|
1 |
<?php |
|
2 |
############################################################################# |
|
3 |
# Driver file for taiper2flatClient, a simple script to |
|
4 |
# consume taiper web services and save the results in a delimited file, |
|
5 |
# one record per line. The current delimiter is backtick '`' but |
|
6 |
# that can be changed by changing the $seperator variable located |
|
7 |
# in configurableParams.php. |
|
8 |
# |
|
9 |
# Command line useage: |
|
10 |
# php taiper2flat.php |
|
11 |
# |
|
12 |
# dependencies: |
|
13 |
# php 5+ |
|
14 |
# pear |
|
15 |
# HTTP_Request |
|
16 |
# I had to install pear and HTTP_Request before it would work (on Ubuntu): |
|
17 |
# sudo apt-get install php-http-request |
|
18 |
# This should install all dependencies, but just in case, the full |
|
19 |
# dependency list is: |
|
20 |
# php-net-socket |
|
21 |
# php-net-url |
|
22 |
# php-pear |
|
23 |
# php-http-request |
|
24 |
############################################################################# |
|
25 |
|
|
26 |
require_once('HTTP/Request.php'); // pear package |
|
27 |
require_once('tapirRequestTemplate.php'); |
|
28 |
require_once('getAllConcepts.php'); |
|
29 |
|
|
30 |
global $url; |
|
31 |
global $seperator; |
|
32 |
|
|
33 |
$flat_filename = "specieslink.txt"; |
|
34 |
$record_num_filename = "recordnum.dat"; |
|
35 |
$error_log_filename = "error.log"; |
|
36 |
|
|
37 |
$supportedConcepts = getAllConcepts(); |
|
38 |
if(!is_array($supportedConcepts)){ |
|
39 |
$error_log = fopen($error_log_filename,"a"); |
|
40 |
fwrite($error_log, "Unable to retrieve supported concepts from service, failing.\n"); |
|
41 |
fclose($error_log); |
|
42 |
exit; |
|
43 |
} |
|
44 |
|
|
45 |
$successive_failures = 0; |
|
46 |
|
|
47 |
$start_record = 0; // Record to begin first request (ordinal position, not ID) |
|
48 |
if(file_exists($record_num_filename)) { |
|
49 |
$start_record = file_get_contents($record_num_filename); |
|
50 |
} |
|
51 |
|
|
52 |
$request_limit = 1000; // Number of records per request; cannot exceed 1000 |
|
53 |
|
|
54 |
//According to TAPIR docs, the estimate from the service |
|
55 |
//is an upper bound on the number of records. Start out high. |
|
56 |
$estimated_max_from_service = 4000000; |
|
57 |
|
|
58 |
if(!file_exists($flat_filename)) { |
|
59 |
$flat_file = fopen($flat_filename,"a"); |
|
60 |
$file_header = ''; |
|
61 |
foreach(array_keys($supportedConcepts) as $key){ |
|
62 |
$file_header .= $key . $seperator; |
|
63 |
} |
|
64 |
$file_header = substr_replace($file_header ,"",-1); |
|
65 |
$file_header .= "\n"; |
|
66 |
fwrite($flat_file,$file_header); |
|
67 |
fclose($flat_file); |
|
68 |
} |
|
69 |
|
|
70 |
$finished = false; |
|
71 |
$itrNum = 0; |
|
72 |
|
|
73 |
$http_request = new HTTP_Request(); |
|
74 |
|
|
75 |
while(!$finished && $start_record < $estimated_max_from_service) { |
|
76 |
$itrNum++; |
|
77 |
|
|
78 |
$body = buildRequest($start_record,$request_limit,$supportedConcepts); |
|
79 |
|
|
80 |
$http_request->setMethod( 'POST' ); |
|
81 |
$http_request->addHeader('Content-Type', 'text/xml'); |
|
82 |
$http_request->addRawPostData( $body ); |
|
83 |
$http_request->setURL( $url ); |
|
84 |
$http_request->_timeout = 300; |
|
85 |
$http_request->_readTimeout = 300; |
|
86 |
|
|
87 |
// This can be used to see the entire request |
|
88 |
#$raw_request = $http_request->_buildRequest(); |
|
89 |
#echo "\n\n" . $raw_request; |
|
90 |
|
|
91 |
$http_request->sendRequest(); |
|
92 |
|
|
93 |
$response = $http_request->getResponseBody(); |
|
94 |
$code = $http_request->getResponseCode(); |
|
95 |
|
|
96 |
if ( $code != 200 ) // 200 = OK |
|
97 |
{ |
|
98 |
$label = 'Unknown Error'; |
|
99 |
|
|
100 |
switch ( $code ) |
|
101 |
{ |
|
102 |
case 201: $label = 'Created'; break; |
|
103 |
case 202: $label = 'Accepted'; break; |
|
104 |
case 203: $label = 'Non-Authoritative Information'; break; |
|
105 |
case 204: $label = 'No Content'; break; |
|
106 |
case 205: $label = 'Reset Content'; break; |
|
107 |
case 206: $label = 'Partial Content'; break; |
|
108 |
case 300: $label = 'Multiple Choices'; break; |
|
109 |
case 301: $label = 'Moved Permanently'; break; |
|
110 |
case 302: $label = 'Found'; break; |
|
111 |
case 303: $label = 'See Other'; break; |
|
112 |
case 304: $label = 'Not Modified'; break; |
|
113 |
case 305: $label = 'Use Proxy'; break; |
|
114 |
case 307: $label = 'Temporary Redirect'; break; |
|
115 |
case 400: $label = 'Bad Request'; break; |
|
116 |
case 401: $label = 'Unauthorized'; break; |
|
117 |
case 402: $label = 'Payment Required'; break; |
|
118 |
case 403: $label = 'Forbidden'; break; |
|
119 |
case 404: $label = 'Not Found'; break; |
|
120 |
case 405: $label = 'Method Not Allowed'; break; |
|
121 |
case 406: $label = 'Not Acceptable'; break; |
|
122 |
case 407: $label = 'Proxy Authentication Required'; break; |
|
123 |
case 408: $label = 'Request Timeout'; break; |
|
124 |
case 409: $label = 'Conflict'; break; |
|
125 |
case 410: $label = 'Gone'; break; |
|
126 |
case 411: $label = 'Length Required'; break; |
|
127 |
case 412: $label = 'Precondition Failed'; break; |
|
128 |
case 413: $label = 'Request Entity Too Large'; break; |
|
129 |
case 414: $label = 'Request-URI Too Long'; break; |
|
130 |
case 415: $label = 'Unsupported Media Type'; break; |
|
131 |
case 416: $label = 'Requested Range Not Satisfiable'; break; |
|
132 |
case 417: $label = 'Expectation Failed'; break; |
|
133 |
case 500: $label = 'Internal Server Error'; break; |
|
134 |
case 501: $label = 'Not Implemented'; break; |
|
135 |
case 502: $label = 'Bad Gateway'; break; |
|
136 |
case 503: $label = 'Service Unavailable'; break; |
|
137 |
case 504: $label = 'Gateway Timeout'; break; |
|
138 |
case 505: $label = 'HTTP Version Not Supported'; break; |
|
139 |
} |
|
140 |
|
|
141 |
$error_log = fopen($error_log_filename,"a"); |
|
142 |
fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n"); |
|
143 |
|
|
144 |
$successive_failures += 1; |
|
145 |
if($successive_failures >= 3) { |
|
146 |
fwrite($error_log, "3 successive failures, quitting\n"); |
|
147 |
fclose($error_log); |
|
148 |
exit; |
|
149 |
} |
|
150 |
|
|
151 |
fclose($error_log); |
|
152 |
|
|
153 |
//Should repeat the same request |
|
154 |
continue; |
|
155 |
} |
|
156 |
|
|
157 |
#Weird encoding bug - need to remove the ^F and ^L characters so |
|
158 |
#that the xml parser won't choke. |
|
159 |
$xmlDoc = new DOMDocument(); |
|
160 |
$response = preg_replace("//","6",$response); |
|
161 |
$response = preg_replace("//","12",$response); |
|
162 |
$xmlDoc->loadXML($response); |
|
163 |
|
|
164 |
$errors = $xmlDoc->getElementsByTagName("error"); |
|
165 |
if($errors->length > 0) { |
|
166 |
$error_log = fopen($error_log_filename,"a"); |
|
167 |
foreach($errors as $error) { |
|
168 |
fwrite($error_log, $error->nodeValue . "\n"); |
|
169 |
} |
|
170 |
|
|
171 |
$successive_failures += 1; |
|
172 |
if($successive_failures >= 3) { |
|
173 |
fwrite($error_log, "3 successive failures, quitting\n"); |
|
174 |
fclose($error_log); |
|
175 |
exit; |
|
176 |
} |
|
177 |
|
|
178 |
fclose($error_log); |
|
179 |
|
|
180 |
//Should repeat the same request |
|
181 |
continue; |
|
182 |
} |
|
183 |
|
|
184 |
$summary = $xmlDoc->getElementsByTagName("summary"); |
|
185 |
if($summary->length == 0) { |
|
186 |
$error_log = fopen($error_log_filename,"a"); |
|
187 |
fwrite($error_log, "No summary node, assuming there's a missed error.\n"); |
|
188 |
|
|
189 |
$successive_failures += 1; |
|
190 |
if($successive_failures >= 3) { |
|
191 |
fwrite($error_log, "3 successive failures, quitting\n"); |
|
192 |
fclose($error_log); |
|
193 |
exit; |
|
194 |
} |
|
195 |
|
|
196 |
fclose($error_log); |
|
197 |
continue; |
|
198 |
} else { |
|
199 |
$start_record_temp = $summary->item(0)->getAttribute("next"); |
|
200 |
$estimated_max_from_service = $summary->item(0)->getAttribute("totalMatched"); |
|
201 |
} |
|
202 |
|
|
203 |
$records = $xmlDoc->getElementsByTagName("record"); |
|
204 |
$recordsStr = ''; |
|
205 |
foreach($records as $record) { |
|
206 |
foreach(array_keys($supportedConcepts) as $key){ |
|
207 |
$element = $record->getElementsByTagName($key); |
|
208 |
if($element->length > 0) { |
|
209 |
$val = $element->item(0)->nodeValue; |
|
210 |
$val = preg_replace("/$seperator/"," ",$val); |
|
211 |
$val = preg_replace("/,/"," ",$val); |
|
212 |
$val = preg_replace("/\"/"," ",$val); |
|
213 |
$recordsStr .= $val; |
|
214 |
} |
|
215 |
$recordsStr .= $seperator; |
|
216 |
} |
|
217 |
$recordsStr = substr_replace($recordsStr ,"",-1); |
|
218 |
$recordsStr .= "\n"; |
|
219 |
} |
|
220 |
|
|
221 |
$flat_file = fopen($flat_filename,"a"); |
|
222 |
fwrite($flat_file,$recordsStr); |
|
223 |
fclose($flat_file); |
|
224 |
|
|
225 |
$successive_failures = 0; |
|
226 |
|
|
227 |
$record_file = fopen($record_num_filename,"w"); |
|
228 |
fwrite($record_file,$start_record_temp); |
|
229 |
fclose($record_file); |
|
230 |
$start_record = $start_record_temp; |
|
231 |
|
|
232 |
//If number of records is less than request_limit, it means |
|
233 |
//the service is out of records. |
|
234 |
$finished = ($request_limit > $records->length); |
|
235 |
|
|
236 |
if($itrNum == 1) { |
|
237 |
print "Extimated number of records: $estimated_max_from_service\n"; |
|
238 |
} elseif($itrNum % 10 == 0) { |
|
239 |
print "Pulled $start_record records out of $estimated_max_from_service ". |
|
240 |
"estimated total records.\n"; |
|
241 |
} |
|
242 |
|
|
243 |
} |
|
244 |
|
|
245 |
?> |
|
246 |
|
Also available in: Unified diff
First Import