Revision 258
Added by Aaron Marcuse-Kubitza about 13 years ago
tapir2flatClient/trunk/client/tapirRequestTemplate.php | ||
---|---|---|
1 |
<?php |
|
2 |
############################################################################# |
|
3 |
# Builds the xml body to be sent to Tapir service. Only put in a |
|
4 |
# seperate file because there is a lot of code-invarient text that would |
|
5 |
# just make other parts of the script difficult to read. |
|
6 |
############################################################################# |
|
7 |
|
|
8 |
require_once('configurableParams.php'); |
|
9 |
|
|
10 |
function buildStructure($supportedConcepts) { |
|
11 |
$structureStr = ''; |
|
12 |
foreach(array_keys($supportedConcepts) as $key){ |
|
13 |
$structureStr .= |
|
14 |
" <xs:element name=\"$key\" type=\"xs:string\" minOccurs=\"0\"/>\n"; |
|
15 |
} |
|
16 |
return $structureStr; |
|
17 |
} |
|
18 |
|
|
19 |
function buildMap($supportedConcepts) { |
|
20 |
$mapStr = ''; |
|
21 |
foreach(array_keys($supportedConcepts) as $key){ |
|
22 |
$mapStr .= |
|
23 |
" <node path=\"/records/record/$key\">\n". |
|
24 |
" <concept id=\"$supportedConcepts[$key]\"/>\n". |
|
25 |
" </node>\n"; |
|
26 |
} |
|
27 |
return $mapStr; |
|
28 |
} |
|
29 |
|
|
30 |
function buildRequest($start,$limit,$supportedConcepts) { |
|
31 |
|
|
32 |
$xmlRequest = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n". |
|
33 |
"<request \n". |
|
34 |
" xmlns=\"http://rs.tdwg.org/tapir/1.0\"\n". |
|
35 |
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n". |
|
36 |
" xsi:schemaLocation=\"http://rs.tdwg.org/tapir/1.0 \n". |
|
37 |
" http://rs.tdwg.org/tapir/1.0/schema/tapir.xsd\">\n". |
|
38 |
" <header>\n". |
|
39 |
" </header>\n". |
|
40 |
" <search count=\"true\" start=\"$start\" limit=\"$limit\" envelope=\"true\">\n". |
|
41 |
" <outputModel>\n". |
|
42 |
" <structure>\n". |
|
43 |
" <xs:schema targetNamespace=\"http://example.net/simple_specimen\" xmlns:xs=\"http://www.w3.org/2001/XMLSchema\" xsi:schemaLocation=\"http://www.w3.org/2001/XMLSchema http://www.w3.org/2001/XMLSchema.xsd\">\n". |
|
44 |
" <xs:element name=\"records\">\n". |
|
45 |
" <xs:complexType>\n". |
|
46 |
" <xs:sequence>\n". |
|
47 |
" <xs:element name=\"record\" minOccurs=\"0\" maxOccurs=\"unbounded\" type=\"unitType\">\n". |
|
48 |
" </xs:element>\n". |
|
49 |
" </xs:sequence>\n". |
|
50 |
" </xs:complexType>\n". |
|
51 |
" </xs:element>\n". |
|
52 |
" <xs:complexType name=\"unitType\">\n". |
|
53 |
" <xs:sequence>\n". |
|
54 |
buildStructure($supportedConcepts). |
|
55 |
" </xs:sequence>\n". |
|
56 |
" </xs:complexType>\n". |
|
57 |
" </xs:schema>\n". |
|
58 |
" </structure>\n". |
|
59 |
" <indexingElement path=\"/records/record\"/>\n". |
|
60 |
" <mapping>\n". |
|
61 |
buildMap($supportedConcepts). |
|
62 |
" </mapping>\n". |
|
63 |
" </outputModel>\n". |
|
64 |
" <filter>\n". |
|
65 |
buildFilter(). |
|
66 |
" </filter>\n". |
|
67 |
" </search>\n". |
|
68 |
"</request>\n"; |
|
69 |
|
|
70 |
return $xmlRequest; |
|
71 |
} |
|
72 |
|
|
73 |
function buildCapabilitiesRequest() { |
|
74 |
$xmlRequest = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n". |
|
75 |
"<request \n". |
|
76 |
" xmlns=\"http://rs.tdwg.org/tapir/1.0\"\n". |
|
77 |
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n". |
|
78 |
" xsi:schemaLocation=\"http://rs.tdwg.org/tapir/1.0 \n". |
|
79 |
" http://rs.tdwg.org/tapir/1.0/schema/tapir.xsd\">\n". |
|
80 |
" <header>\n". |
|
81 |
" </header>\n". |
|
82 |
" <capabilities />\n". |
|
83 |
"</request>\n"; |
|
84 |
|
|
85 |
return $xmlRequest; |
|
86 |
} |
|
87 |
|
|
88 |
?> |
|
89 |
|
tapir2flatClient/trunk/client/configurableParams.php | ||
---|---|---|
1 |
<?php |
|
2 |
############################################################################# |
|
3 |
# File containing all the paramaters that could potentially need to be |
|
4 |
# configured for the job. |
|
5 |
# |
|
6 |
# $url: The url for the desired Taiper service |
|
7 |
# $seperator: The delimiter for the flat file. |
|
8 |
# buildFilter(): Function to specify the search paramaters. |
|
9 |
# desiredConcepts: Desired data to be returned by the service. |
|
10 |
# |
|
11 |
############################################################################# |
|
12 |
|
|
13 |
# The url of the desired Taiper service |
|
14 |
$url = 'http://tapir.cria.org.br/tapirlink/tapir.php/specieslink'; |
|
15 |
|
|
16 |
# The seperator to be used in the flat file to seperate the |
|
17 |
# data fields of each record. |
|
18 |
$seperator = '`'; |
|
19 |
|
|
20 |
# Tapir services have an extremely flexable syntax to search for |
|
21 |
# records with the desired properties. However, this flexibility |
|
22 |
# makes it rather difficult to generalize the search possibilities |
|
23 |
# and quickly come up with appropriate xml. As such, the current |
|
24 |
# solution is to build the filter node here. See the associated |
|
25 |
# documentation on how to build the filter: |
|
26 |
# http://www.tdwg.org/dav/subgroups/tapir/1.0/docs/tdwg_tapir_specification_2010-05-05.htm#toc47 |
|
27 |
function buildFilter() { |
|
28 |
$filter = |
|
29 |
'<equals>' . |
|
30 |
'<concept id="http://rs.tdwg.org/dwc/dwcore/Kingdom"/>' . |
|
31 |
'<literal value="Plantae"/>' . |
|
32 |
'</equals>'; |
|
33 |
return $filter; |
|
34 |
} |
|
35 |
|
|
36 |
?> |
tapir2flatClient/trunk/client/getAllConcepts.php | ||
---|---|---|
1 |
<?php |
|
2 |
############################################################################# |
|
3 |
# This file holds the utility function getAllConcepts which calls the |
|
4 |
# tapir service to find out all the concepts (data units) that are |
|
5 |
# supported. This list is necessary so that we know what to ask for |
|
6 |
# when making the rquest. |
|
7 |
############################################################################# |
|
8 |
|
|
9 |
require_once('HTTP/Request.php'); // pear package |
|
10 |
require_once('tapirRequestTemplate.php'); |
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
function getAllConcepts() { |
|
15 |
global $url; |
|
16 |
|
|
17 |
$error_log_filename = "error.log"; |
|
18 |
$http_request = new HTTP_Request(); |
|
19 |
|
|
20 |
$body = buildCapabilitiesRequest(); |
|
21 |
|
|
22 |
$http_request->setMethod( 'POST' ); |
|
23 |
$http_request->addHeader('Content-Type', 'text/xml'); |
|
24 |
$http_request->addRawPostData( $body ); |
|
25 |
$http_request->setURL( $url ); |
|
26 |
$http_request->_timeout = 300; |
|
27 |
$http_request->_readTimeout = 300; |
|
28 |
|
|
29 |
// This can be used to see the entire request |
|
30 |
#$raw_request = $http_request->_buildRequest(); |
|
31 |
#echo "\n\n" . $raw_request; |
|
32 |
|
|
33 |
$http_request->sendRequest(); |
|
34 |
|
|
35 |
$response = $http_request->getResponseBody(); |
|
36 |
$code = $http_request->getResponseCode(); |
|
37 |
|
|
38 |
if ( $code != 200 ) // 200 = OK |
|
39 |
{ |
|
40 |
$label = 'Unknown Error'; |
|
41 |
|
|
42 |
switch ( $code ) |
|
43 |
{ |
|
44 |
case 201: $label = 'Created'; break; |
|
45 |
case 202: $label = 'Accepted'; break; |
|
46 |
case 203: $label = 'Non-Authoritative Information'; break; |
|
47 |
case 204: $label = 'No Content'; break; |
|
48 |
case 205: $label = 'Reset Content'; break; |
|
49 |
case 206: $label = 'Partial Content'; break; |
|
50 |
case 300: $label = 'Multiple Choices'; break; |
|
51 |
case 301: $label = 'Moved Permanently'; break; |
|
52 |
case 302: $label = 'Found'; break; |
|
53 |
case 303: $label = 'See Other'; break; |
|
54 |
case 304: $label = 'Not Modified'; break; |
|
55 |
case 305: $label = 'Use Proxy'; break; |
|
56 |
case 307: $label = 'Temporary Redirect'; break; |
|
57 |
case 400: $label = 'Bad Request'; break; |
|
58 |
case 401: $label = 'Unauthorized'; break; |
|
59 |
case 402: $label = 'Payment Required'; break; |
|
60 |
case 403: $label = 'Forbidden'; break; |
|
61 |
case 404: $label = 'Not Found'; break; |
|
62 |
case 405: $label = 'Method Not Allowed'; break; |
|
63 |
case 406: $label = 'Not Acceptable'; break; |
|
64 |
case 407: $label = 'Proxy Authentication Required'; break; |
|
65 |
case 408: $label = 'Request Timeout'; break; |
|
66 |
case 409: $label = 'Conflict'; break; |
|
67 |
case 410: $label = 'Gone'; break; |
|
68 |
case 411: $label = 'Length Required'; break; |
|
69 |
case 412: $label = 'Precondition Failed'; break; |
|
70 |
case 413: $label = 'Request Entity Too Large'; break; |
|
71 |
case 414: $label = 'Request-URI Too Long'; break; |
|
72 |
case 415: $label = 'Unsupported Media Type'; break; |
|
73 |
case 416: $label = 'Requested Range Not Satisfiable'; break; |
|
74 |
case 417: $label = 'Expectation Failed'; break; |
|
75 |
case 500: $label = 'Internal Server Error'; break; |
|
76 |
case 501: $label = 'Not Implemented'; break; |
|
77 |
case 502: $label = 'Bad Gateway'; break; |
|
78 |
case 503: $label = 'Service Unavailable'; break; |
|
79 |
case 504: $label = 'Gateway Timeout'; break; |
|
80 |
case 505: $label = 'HTTP Version Not Supported'; break; |
|
81 |
} |
|
82 |
|
|
83 |
$error_log = fopen($error_log_filename,"a"); |
|
84 |
fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n". |
|
85 |
"while attempting to request supported concepts\n"); |
|
86 |
fclose($error_log); |
|
87 |
echo( "Service responded with HTTP ".$code." code: ".$label."\n". |
|
88 |
"while attempting to request supported concepts\n"); |
|
89 |
return 0; |
|
90 |
} |
|
91 |
|
|
92 |
#Weird encoding bug - need to remove the ^F and ^L characters so |
|
93 |
#that the xml parser won't choke. |
|
94 |
$xmlDoc = new DOMDocument(); |
|
95 |
$response = preg_replace("//","6",$response); |
|
96 |
$response = preg_replace("//","12",$response); |
|
97 |
$xmlDoc->loadXML($response); |
|
98 |
|
|
99 |
$errors = $xmlDoc->getElementsByTagName("error"); |
|
100 |
if($errors->length > 0) { |
|
101 |
$error_log = fopen($error_log_filename,"a"); |
|
102 |
foreach($errors as $error) { |
|
103 |
fwrite($error_log, $error->nodeValue . "\n"); |
|
104 |
} |
|
105 |
fclose($error_log); |
|
106 |
return 0; |
|
107 |
} |
|
108 |
|
|
109 |
$concepts = $xmlDoc->getElementsByTagName("mappedConcept"); |
|
110 |
$concepts_array = array(); |
|
111 |
if($concepts->length == 0) { |
|
112 |
$error_log = fopen($error_log_filename,"a"); |
|
113 |
fwrite($error_log, "No mappedConcept node, assuming there's a missed error.\n"); |
|
114 |
fclose($error_log); |
|
115 |
return 0; |
|
116 |
} else { |
|
117 |
foreach($concepts as $concept) { |
|
118 |
$concept_id = $concept->getAttribute("id"); |
|
119 |
$concept_key = preg_replace("/\//","_",$concept_id); |
|
120 |
$concept_key = preg_replace("/\./","_",$concept_key); |
|
121 |
$concept_key = preg_replace("/:/","",$concept_key); |
|
122 |
$concepts_array[$concept_key] = $concept_id; |
|
123 |
} |
|
124 |
} |
|
125 |
return $concepts_array; |
|
126 |
} |
|
127 |
|
|
128 |
?> |
|
129 |
|
tapir2flatClient/trunk/client/README | ||
---|---|---|
1 |
tapir2flat.php: a simple script to |
|
2 |
consume taiper web services and save the results in a delimited file, |
|
3 |
one record per line. The current delimiter is backtick '`' but |
|
4 |
that can be changed by changing the $seperator variable located |
|
5 |
in configurableParams.php. |
|
6 |
|
|
7 |
Command line useage: |
|
8 |
php taiper2flat.php |
|
9 |
|
|
10 |
dependencies: |
|
11 |
php 5+ |
|
12 |
pear |
|
13 |
HTTP_Request |
|
14 |
|
|
15 |
I had to install pear and HTTP_Request before it would work (on Ubuntu): |
|
16 |
sudo apt-get install php-http-request |
|
17 |
|
|
18 |
This should install all dependencies, but just in case, the full dependency list is: |
|
19 |
php-net-socket |
|
20 |
php-net-url |
|
21 |
php-pear |
|
22 |
php-http-request |
|
23 |
|
|
24 |
Necessary files: |
|
25 |
tapir2flat.php: Driver file |
|
26 |
configurableParams.php: File containing paramaters that can be configured |
|
27 |
tapirRequestTemplate.php: Holds the template required to send a tapir request. |
|
28 |
getAllConcepts.php: holds utility function to get all possible return values from tapir service. |
|
29 |
|
|
30 |
Possible output files: |
|
31 |
specieslink.txt: Where the retrieved records will be stored. Each line represents one record, delimited by whatever is configured as the delimitation paramater. |
|
32 |
recordnum.dat: Holds the starting record number for the most recently sent request. |
|
33 |
error.log: Where any errors will be recorded. |
|
34 |
|
|
35 |
Possible configurations: see configurableParams.php as well. |
|
36 |
The url of desired tapir service |
|
37 |
Delimiter to be used to sperate data fields within each record |
|
38 |
The filter sent to tapir service that specifies search parameters, in xml format. See http://www.tdwg.org/dav/subgroups/tapir/1.0/docs/tdwg_tapir_specification_2010-05-05.htm#toc47 for documentation on how to do this. |
|
39 |
|
tapir2flatClient/trunk/client/tapir2flat.php | ||
---|---|---|
1 |
<?php |
|
2 |
############################################################################# |
|
3 |
# Driver file for taiper2flatClient, a simple script to |
|
4 |
# consume taiper web services and save the results in a delimited file, |
|
5 |
# one record per line. The current delimiter is backtick '`' but |
|
6 |
# that can be changed by changing the $seperator variable located |
|
7 |
# in configurableParams.php. |
|
8 |
# |
|
9 |
# Command line useage: |
|
10 |
# php taiper2flat.php |
|
11 |
# |
|
12 |
# dependencies: |
|
13 |
# php 5+ |
|
14 |
# pear |
|
15 |
# HTTP_Request |
|
16 |
# I had to install pear and HTTP_Request before it would work (on Ubuntu): |
|
17 |
# sudo apt-get install php-http-request |
|
18 |
# This should install all dependencies, but just in case, the full |
|
19 |
# dependency list is: |
|
20 |
# php-net-socket |
|
21 |
# php-net-url |
|
22 |
# php-pear |
|
23 |
# php-http-request |
|
24 |
############################################################################# |
|
25 |
|
|
26 |
require_once('HTTP/Request.php'); // pear package |
|
27 |
require_once('tapirRequestTemplate.php'); |
|
28 |
require_once('getAllConcepts.php'); |
|
29 |
|
|
30 |
global $url; |
|
31 |
global $seperator; |
|
32 |
|
|
33 |
$flat_filename = "specieslink.txt"; |
|
34 |
$record_num_filename = "recordnum.dat"; |
|
35 |
$error_log_filename = "error.log"; |
|
36 |
|
|
37 |
$supportedConcepts = getAllConcepts(); |
|
38 |
if(!is_array($supportedConcepts)){ |
|
39 |
$error_log = fopen($error_log_filename,"a"); |
|
40 |
fwrite($error_log, "Unable to retrieve supported concepts from service, failing.\n"); |
|
41 |
fclose($error_log); |
|
42 |
exit; |
|
43 |
} |
|
44 |
|
|
45 |
$successive_failures = 0; |
|
46 |
|
|
47 |
$start_record = 0; // Record to begin first request (ordinal position, not ID) |
|
48 |
if(file_exists($record_num_filename)) { |
|
49 |
$start_record = file_get_contents($record_num_filename); |
|
50 |
} |
|
51 |
|
|
52 |
$request_limit = 1000; // Number of records per request; cannot exceed 1000 |
|
53 |
|
|
54 |
//According to TAPIR docs, the estimate from the service |
|
55 |
//is an upper bound on the number of records. Start out high. |
|
56 |
$estimated_max_from_service = 4000000; |
|
57 |
|
|
58 |
if(!file_exists($flat_filename)) { |
|
59 |
$flat_file = fopen($flat_filename,"a"); |
|
60 |
$file_header = ''; |
|
61 |
foreach(array_keys($supportedConcepts) as $key){ |
|
62 |
$file_header .= $key . $seperator; |
|
63 |
} |
|
64 |
$file_header = substr_replace($file_header ,"",-1); |
|
65 |
$file_header .= "\n"; |
|
66 |
fwrite($flat_file,$file_header); |
|
67 |
fclose($flat_file); |
|
68 |
} |
|
69 |
|
|
70 |
$finished = false; |
|
71 |
$itrNum = 0; |
|
72 |
|
|
73 |
$http_request = new HTTP_Request(); |
|
74 |
|
|
75 |
while(!$finished && $start_record < $estimated_max_from_service) { |
|
76 |
$itrNum++; |
|
77 |
|
|
78 |
$body = buildRequest($start_record,$request_limit,$supportedConcepts); |
|
79 |
|
|
80 |
$http_request->setMethod( 'POST' ); |
|
81 |
$http_request->addHeader('Content-Type', 'text/xml'); |
|
82 |
$http_request->addRawPostData( $body ); |
|
83 |
$http_request->setURL( $url ); |
|
84 |
$http_request->_timeout = 300; |
|
85 |
$http_request->_readTimeout = 300; |
|
86 |
|
|
87 |
// This can be used to see the entire request |
|
88 |
#$raw_request = $http_request->_buildRequest(); |
|
89 |
#echo "\n\n" . $raw_request; |
|
90 |
|
|
91 |
$http_request->sendRequest(); |
|
92 |
|
|
93 |
$response = $http_request->getResponseBody(); |
|
94 |
$code = $http_request->getResponseCode(); |
|
95 |
|
|
96 |
if ( $code != 200 ) // 200 = OK |
|
97 |
{ |
|
98 |
$label = 'Unknown Error'; |
|
99 |
|
|
100 |
switch ( $code ) |
|
101 |
{ |
|
102 |
case 201: $label = 'Created'; break; |
|
103 |
case 202: $label = 'Accepted'; break; |
|
104 |
case 203: $label = 'Non-Authoritative Information'; break; |
|
105 |
case 204: $label = 'No Content'; break; |
|
106 |
case 205: $label = 'Reset Content'; break; |
|
107 |
case 206: $label = 'Partial Content'; break; |
|
108 |
case 300: $label = 'Multiple Choices'; break; |
|
109 |
case 301: $label = 'Moved Permanently'; break; |
|
110 |
case 302: $label = 'Found'; break; |
|
111 |
case 303: $label = 'See Other'; break; |
|
112 |
case 304: $label = 'Not Modified'; break; |
|
113 |
case 305: $label = 'Use Proxy'; break; |
|
114 |
case 307: $label = 'Temporary Redirect'; break; |
|
115 |
case 400: $label = 'Bad Request'; break; |
|
116 |
case 401: $label = 'Unauthorized'; break; |
|
117 |
case 402: $label = 'Payment Required'; break; |
|
118 |
case 403: $label = 'Forbidden'; break; |
|
119 |
case 404: $label = 'Not Found'; break; |
|
120 |
case 405: $label = 'Method Not Allowed'; break; |
|
121 |
case 406: $label = 'Not Acceptable'; break; |
|
122 |
case 407: $label = 'Proxy Authentication Required'; break; |
|
123 |
case 408: $label = 'Request Timeout'; break; |
|
124 |
case 409: $label = 'Conflict'; break; |
|
125 |
case 410: $label = 'Gone'; break; |
|
126 |
case 411: $label = 'Length Required'; break; |
|
127 |
case 412: $label = 'Precondition Failed'; break; |
|
128 |
case 413: $label = 'Request Entity Too Large'; break; |
|
129 |
case 414: $label = 'Request-URI Too Long'; break; |
|
130 |
case 415: $label = 'Unsupported Media Type'; break; |
|
131 |
case 416: $label = 'Requested Range Not Satisfiable'; break; |
|
132 |
case 417: $label = 'Expectation Failed'; break; |
|
133 |
case 500: $label = 'Internal Server Error'; break; |
|
134 |
case 501: $label = 'Not Implemented'; break; |
|
135 |
case 502: $label = 'Bad Gateway'; break; |
|
136 |
case 503: $label = 'Service Unavailable'; break; |
|
137 |
case 504: $label = 'Gateway Timeout'; break; |
|
138 |
case 505: $label = 'HTTP Version Not Supported'; break; |
|
139 |
} |
|
140 |
|
|
141 |
$error_log = fopen($error_log_filename,"a"); |
|
142 |
fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n"); |
|
143 |
|
|
144 |
$successive_failures += 1; |
|
145 |
if($successive_failures >= 3) { |
|
146 |
fwrite($error_log, "3 successive failures, quitting\n"); |
|
147 |
fclose($error_log); |
|
148 |
exit; |
|
149 |
} |
|
150 |
|
|
151 |
fclose($error_log); |
|
152 |
|
|
153 |
//Should repeat the same request |
|
154 |
continue; |
|
155 |
} |
|
156 |
|
|
157 |
#Weird encoding bug - need to remove the ^F and ^L characters so |
|
158 |
#that the xml parser won't choke. |
|
159 |
$xmlDoc = new DOMDocument(); |
|
160 |
$response = preg_replace("//","6",$response); |
|
161 |
$response = preg_replace("//","12",$response); |
|
162 |
$xmlDoc->loadXML($response); |
|
163 |
|
|
164 |
$errors = $xmlDoc->getElementsByTagName("error"); |
|
165 |
if($errors->length > 0) { |
|
166 |
$error_log = fopen($error_log_filename,"a"); |
|
167 |
foreach($errors as $error) { |
|
168 |
fwrite($error_log, $error->nodeValue . "\n"); |
|
169 |
} |
|
170 |
|
|
171 |
$successive_failures += 1; |
|
172 |
if($successive_failures >= 3) { |
|
173 |
fwrite($error_log, "3 successive failures, quitting\n"); |
|
174 |
fclose($error_log); |
|
175 |
exit; |
|
176 |
} |
|
177 |
|
|
178 |
fclose($error_log); |
|
179 |
|
|
180 |
//Should repeat the same request |
|
181 |
continue; |
|
182 |
} |
|
183 |
|
|
184 |
$summary = $xmlDoc->getElementsByTagName("summary"); |
|
185 |
if($summary->length == 0) { |
|
186 |
$error_log = fopen($error_log_filename,"a"); |
|
187 |
fwrite($error_log, "No summary node, assuming there's a missed error.\n"); |
|
188 |
|
|
189 |
$successive_failures += 1; |
|
190 |
if($successive_failures >= 3) { |
|
191 |
fwrite($error_log, "3 successive failures, quitting\n"); |
|
192 |
fclose($error_log); |
|
193 |
exit; |
|
194 |
} |
|
195 |
|
|
196 |
fclose($error_log); |
|
197 |
continue; |
|
198 |
} else { |
|
199 |
$start_record_temp = $summary->item(0)->getAttribute("next"); |
|
200 |
$estimated_max_from_service = $summary->item(0)->getAttribute("totalMatched"); |
|
201 |
} |
|
202 |
|
|
203 |
$records = $xmlDoc->getElementsByTagName("record"); |
|
204 |
$recordsStr = ''; |
|
205 |
foreach($records as $record) { |
|
206 |
foreach(array_keys($supportedConcepts) as $key){ |
|
207 |
$element = $record->getElementsByTagName($key); |
|
208 |
if($element->length > 0) { |
|
209 |
$val = $element->item(0)->nodeValue; |
|
210 |
$val = preg_replace("/$seperator/"," ",$val); |
|
211 |
$val = preg_replace("/,/"," ",$val); |
|
212 |
$val = preg_replace("/\"/"," ",$val); |
|
213 |
$recordsStr .= $val; |
|
214 |
} |
|
215 |
$recordsStr .= $seperator; |
|
216 |
} |
|
217 |
$recordsStr = substr_replace($recordsStr ,"",-1); |
|
218 |
$recordsStr .= "\n"; |
|
219 |
} |
|
220 |
|
|
221 |
$flat_file = fopen($flat_filename,"a"); |
|
222 |
fwrite($flat_file,$recordsStr); |
|
223 |
fclose($flat_file); |
|
224 |
|
|
225 |
$successive_failures = 0; |
|
226 |
|
|
227 |
$record_file = fopen($record_num_filename,"w"); |
|
228 |
fwrite($record_file,$start_record_temp); |
|
229 |
fclose($record_file); |
|
230 |
$start_record = $start_record_temp; |
|
231 |
|
|
232 |
//If number of records is less than request_limit, it means |
|
233 |
//the service is out of records. |
|
234 |
$finished = ($request_limit > $records->length); |
|
235 |
|
|
236 |
if($itrNum == 1) { |
|
237 |
print "Extimated number of records: $estimated_max_from_service\n"; |
|
238 |
} elseif($itrNum % 10 == 0) { |
|
239 |
print "Pulled $start_record records out of $estimated_max_from_service ". |
|
240 |
"estimated total records.\n"; |
|
241 |
} |
|
242 |
|
|
243 |
} |
|
244 |
|
|
245 |
?> |
|
246 |
|
makeVegxModel/trunk/util/findTiers.php | ||
---|---|---|
1 |
<?php |
|
2 |
|
|
3 |
$ignoreTypes = array( |
|
4 |
"doc:description" => true, |
|
5 |
"doc:example" => true, |
|
6 |
"doc:moduleDescription" => true, |
|
7 |
"doc:moduleDocs" => true, |
|
8 |
"doc:moduleName" => true, |
|
9 |
"doc:recommendedUsage" => true, |
|
10 |
"doc:standAlone" => true, |
|
11 |
"doc:summary" => true, |
|
12 |
"doc:tooltip" => true, |
|
13 |
"xsd:annotation" => true, |
|
14 |
"xsd:appinfo" => true, |
|
15 |
"xsd:documentation" => true, |
|
16 |
"xsd:import" => true, |
|
17 |
"xsd:include" => true, |
|
18 |
"xsd:schema" => true, |
|
19 |
"AdditionalAbbreviations" => true, |
|
20 |
"BacteriaStatus" => true, |
|
21 |
"BioCodeStatus" => true, |
|
22 |
"BotanyStatus" => true, |
|
23 |
"li" => true, |
|
24 |
"p" => true, |
|
25 |
"para" => true, |
|
26 |
"PreferredAbbreviation" => true, |
|
27 |
"section" => true, |
|
28 |
"Specification" => true, |
|
29 |
"SuffixAlgae" => true, |
|
30 |
"SuffixAnimalia" => true, |
|
31 |
"SuffixBacteriae" => true, |
|
32 |
"SuffixFungi" => true, |
|
33 |
"SuffixFungi" => true, |
|
34 |
"SuffixPlantae" => true, |
|
35 |
"title" => true, |
|
36 |
"ul" => true, |
|
37 |
"ZoologyStatus" => true); |
|
38 |
|
|
39 |
function isIgnoreType($nodeName) { |
|
40 |
global $ignoreTypes; |
|
41 |
return $ignoreTypes[$nodeName]; |
|
42 |
} |
|
43 |
|
|
44 |
$files = array( |
|
45 |
'eml-access.xsd', |
|
46 |
'eml-coverage.xsd', |
|
47 |
'eml-documentation.xsd', |
|
48 |
'eml-literature.xsd', |
|
49 |
'eml-party.xsd', |
|
50 |
'eml-project.xsd', |
|
51 |
'eml-resource.xsd', |
|
52 |
'eml-text.xsd', |
|
53 |
'tcsv101.xsd', |
|
54 |
'tdwg_basetypes.xsd', |
|
55 |
'tdwg_dw_element.xsd', |
|
56 |
'tdwg_dw_geospatial.xsd', |
|
57 |
'veg-misc.xsd', |
|
58 |
'veg-organismobservation.xsd', |
|
59 |
'veg-plotobservation.xsd', |
|
60 |
'veg.xsd', |
|
61 |
'veg-plot.xsd'); |
|
62 |
|
|
63 |
|
|
64 |
function hasImportantNodes($node) { |
|
65 |
$nodes = $node->getElementsbyTagName("*"); |
|
66 |
foreach($nodes as $node) { |
|
67 |
$nodeName = preg_replace("/xs:/","xsd:",$node->nodeName); |
|
68 |
if(!isIgnoreType($nodeName)) { |
|
69 |
return true; |
|
70 |
} |
|
71 |
} |
|
72 |
return false; |
|
73 |
} |
|
74 |
|
|
75 |
$primitiveTypes = array( |
|
76 |
'DEFCATEGORIE' => 'enumerated string', |
|
77 |
'decimalLatitudeDataType' => 'xs:double [-90,90]', |
|
78 |
'decimalLongitudeDataType' => 'xs:double [-180,180]', |
|
79 |
'spatialFitDataType' => 'xs:double (0 or >1 or undefined)', |
|
80 |
'PERCENT' => 'xsd:decimal [0,100]', |
|
81 |
'RoleType' => 'xs:string (union of enumerated string & unenumerated string)', |
|
82 |
'TaxonomicRankEnum' => 'xs:Name (union of several enumerated string types for one big list of enum values)', |
|
83 |
'TINYINT' => 'xsd:decimal [-128,128]', |
|
84 |
'yearDate' => 'xs:gYear xs:date (union of)', |
|
85 |
'anyType' => 'xsd:anyType', |
|
86 |
'InlineType' => 'xs:anyType', |
|
87 |
'date' => 'xsd:date', |
|
88 |
'anyURI' => 'xs:anyURI', |
|
89 |
'decimal' => 'xsd:decimal', |
|
90 |
'gYear' => 'xsd:gYear', |
|
91 |
'string' => 'xs:string', |
|
92 |
'positiveInteger' => 'xs:positiveInteger', |
|
93 |
'time' => 'xs:time', |
|
94 |
'double' => 'xs:double', |
|
95 |
'integer' => 'xs:integer', |
|
96 |
'Name' => 'xs:Name', |
|
97 |
'NMTOKEN' => 'xs:NMTOKEN', |
|
98 |
'DateTimeISO' => 'xs:string', |
|
99 |
'dayOfYearDataType' => 'xs:integer', |
|
100 |
'DescriptorType' => 'xs:string', |
|
101 |
'FunctionType' => 'xs:string', |
|
102 |
'GRingType' => 'xs:anyType', |
|
103 |
'IDType' => 'xs:string', |
|
104 |
'KeyTypeCode' => 'xs:string', |
|
105 |
'MEDIUMINT' => 'xs:decimal', |
|
106 |
'NomenclaturalCodesEnum' => 'xsd:Name', |
|
107 |
'NomenclaturalTypeStatusOfUnitsEnum' => 'xsd:Name', |
|
108 |
'positiveDouble' => 'xs:double', |
|
109 |
'probabilityType' => 'xs:double', |
|
110 |
'ScopeType' => 'xs:string', |
|
111 |
'SMALLINT' => 'xs:decimal', |
|
112 |
'STRATUMINDEX' => 'xs:string', |
|
113 |
'SystemType' => 'xs:string', |
|
114 |
'TaxonomicRankAboveSuperfamilyEnum' => 'xsd:Name', |
|
115 |
'TaxonomicRankBelowSubspeciesEnum' => 'xsd:Name', |
|
116 |
'TaxonomicRankCultivatedPlants' => 'xsd:Name', |
|
117 |
'TaxonomicRankFamilyGroupEnum' => 'xsd:Name', |
|
118 |
'TaxonomicRankFamilySubdivisionEnum' => 'xsd:Name', |
|
119 |
'TaxonomicRankGenusGroupEnum' => 'xsd:Name', |
|
120 |
'TaxonomicRankGenusSubdivisionEnum' => 'xsd:Name', |
|
121 |
'TaxonomicRankSpeciesGroupEnum' => 'xsd:Name', |
|
122 |
); |
|
123 |
|
|
124 |
$primitiveGroupings = array(); |
|
125 |
$primitiveGroupingsVariableLength = array(); |
|
126 |
|
|
127 |
#Find all primitive groupings and primitive groupings of variable length. |
|
128 |
foreach($files as $file) { |
|
129 |
global $primitiveTypes; |
|
130 |
global $primitiveGroupings; |
|
131 |
global $primitiveGroupingsVariableLength; |
|
132 |
|
|
133 |
$xmlDoc = new DOMDocument(); |
|
134 |
$xmlDoc->load($file); |
|
135 |
$nodes = $xmlDoc->getElementsByTagName("*"); |
|
136 |
foreach($nodes as $node) { |
|
137 |
$variable = false; |
|
138 |
$isPrimitiveGroup = true; |
|
139 |
$nodeName = preg_replace("/xs:/","xsd:",$node->nodeName); |
|
140 |
if(!isIgnoreType($nodeName)) { |
|
141 |
if($nodeName == 'xsd:complexType' || $nodeName == 'xsd:simpleType') { |
|
142 |
$actualName = $node->getAttribute('name'); |
|
143 |
$actualName = preg_replace("/.*:/","",$actualName); |
|
144 |
|
|
145 |
#Empty string is not a type |
|
146 |
if($actualName == '') { |
|
147 |
$isPrimitiveGroup = false; |
|
148 |
} |
|
149 |
|
|
150 |
#Don't need to evaluate if we know it to be primitive |
|
151 |
if(array_key_exists($actualName,$primitiveTypes)) { |
|
152 |
$isPrimitiveGroup = false; |
|
153 |
} |
|
154 |
|
|
155 |
#maxOccurs can show up in several child node types, so just look for it in all of them |
|
156 |
$elements = $node->getElementsByTagName("*"); |
|
157 |
foreach($elements as $element) { |
|
158 |
$maxOccurs = $element->getAttribute('maxOccurs'); |
|
159 |
if($maxOccurs == 'unbounded' || $maxOccurs > 1) { $variable = true; } |
|
160 |
} |
|
161 |
|
|
162 |
$elements = $node->getElementsByTagName("element"); |
|
163 |
foreach($elements as $element) { |
|
164 |
$elementType = $element->getAttribute('type'); |
|
165 |
$elementType = preg_replace("/.*:/","",$elementType); |
|
166 |
|
|
167 |
if($elementType != '' && $elementType != $actualName && |
|
168 |
!array_key_exists($elementType,$primitiveTypes)) { |
|
169 |
$isPrimitiveGroup = false; |
|
170 |
break; |
|
171 |
} |
|
172 |
} |
|
173 |
if($isPrimitiveGroup) { |
|
174 |
if($variable) { |
|
175 |
$primitiveGroupingsVariableLength[$actualName] = 1; |
|
176 |
} else { |
|
177 |
$primitiveGroupings[$actualName] = 1; |
|
178 |
} |
|
179 |
} |
|
180 |
} |
|
181 |
} |
|
182 |
} |
|
183 |
} |
|
184 |
|
|
185 |
print "primitive groupings:\n"; |
|
186 |
foreach(array_keys($primitiveGroupings) as $key) { |
|
187 |
print $key . "\n"; |
|
188 |
} |
|
189 |
print "\n\n"; |
|
190 |
|
|
191 |
print "primitive groupings variable length:\n"; |
|
192 |
foreach(array_keys($primitiveGroupingsVariableLength) as $key) { |
|
193 |
print $key . "\n"; |
|
194 |
} |
|
195 |
|
|
196 |
function alreadyFound($name,$tiers) { |
|
197 |
foreach($tiers as $tierArray) { |
|
198 |
if(array_key_exists($name,$tierArray)) { |
|
199 |
return true; |
|
200 |
} |
|
201 |
} |
|
202 |
return false; |
|
203 |
} |
|
204 |
|
|
205 |
function findTierEntities($previousTiers) { |
|
206 |
global $files; |
|
207 |
$returnArray = array(); |
|
208 |
foreach($files as $file) { |
|
209 |
|
|
210 |
$xmlDoc = new DOMDocument(); |
|
211 |
$xmlDoc->load($file); |
|
212 |
$nodes = $xmlDoc->getElementsByTagName("*"); |
|
213 |
foreach($nodes as $node) { |
|
214 |
$isInTier = true; |
|
215 |
$nodeName = preg_replace("/xs:/","xsd:",$node->nodeName); |
|
216 |
if(!isIgnoreType($nodeName)) { |
|
217 |
if($nodeName == 'xsd:complexType' || $nodeName == 'xsd:simpleType') { |
|
218 |
$actualName = $node->getAttribute('name'); |
|
219 |
$actualName = preg_replace("/.*:/","",$actualName); |
|
220 |
|
|
221 |
#Empty string is not a type |
|
222 |
if($actualName == '') { |
|
223 |
$isInTier = false; |
|
224 |
} |
|
225 |
|
|
226 |
#Don't need to evaluate if we know it to be in lower tiers |
|
227 |
if(alreadyFound($actualName,$previousTiers)) { |
|
228 |
$isInTier = false; |
|
229 |
} |
|
230 |
|
|
231 |
$elements = $node->getElementsByTagName("element"); |
|
232 |
foreach($elements as $element) { |
|
233 |
$elementType = $element->getAttribute('type'); |
|
234 |
$elementType = preg_replace("/.*:/","",$elementType); |
|
235 |
|
|
236 |
if($elementType != '' && $elementType != $actualName && |
|
237 |
!alreadyFound($elementType,$previousTiers)) { |
|
238 |
$isInTier = false; |
|
239 |
break; |
|
240 |
} |
|
241 |
} |
|
242 |
if($isInTier) { |
|
243 |
$returnArray[$actualName] = 1; |
|
244 |
} |
|
245 |
} |
|
246 |
} |
|
247 |
} |
|
248 |
} |
|
249 |
return $returnArray; |
|
250 |
} |
|
251 |
|
|
252 |
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength); |
|
253 |
print "\n\n"; |
|
254 |
$tier1 = findTierEntities($prevTiers); |
|
255 |
print "tier 1 entities:\n"; |
|
256 |
foreach(array_keys($tier1) as $key) { |
|
257 |
print $key . "\n"; |
|
258 |
} |
|
259 |
|
|
260 |
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1); |
|
261 |
print "\n\n"; |
|
262 |
$tier2 = findTierEntities($prevTiers); |
|
263 |
print "tier 2 entities:\n"; |
|
264 |
foreach(array_keys($tier2) as $key) { |
|
265 |
print $key . "\n"; |
|
266 |
} |
|
267 |
|
|
268 |
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1,$tier2); |
|
269 |
print "\n\n"; |
|
270 |
$tier3 = findTierEntities($prevTiers); |
|
271 |
print "tier 3 entities:\n"; |
|
272 |
foreach(array_keys($tier3) as $key) { |
|
273 |
print $key . "\n"; |
|
274 |
} |
|
275 |
|
|
276 |
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1,$tier2,$tier3); |
|
277 |
print "\n\n"; |
|
278 |
$tier4 = findTierEntities($prevTiers); |
|
279 |
print "tier 4 entities:\n"; |
|
280 |
foreach(array_keys($tier4) as $key) { |
|
281 |
print $key . "\n"; |
|
282 |
} |
|
283 |
|
|
284 |
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1,$tier2,$tier3,$tier4); |
|
285 |
print "\n\n"; |
|
286 |
$tier5 = findTierEntities($prevTiers); |
|
287 |
print "tier 5 entities:\n"; |
|
288 |
foreach(array_keys($tier5) as $key) { |
|
289 |
print $key . "\n"; |
|
290 |
} |
|
291 |
|
|
292 |
#5 is as deep as we get |
|
293 |
$tot = array_merge($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1,$tier2,$tier3,$tier4); |
|
294 |
print count($tot) . "\n"; |
|
295 |
|
|
296 |
?> |
|
297 |
|
makeVegxModel/trunk/util/utilityFunctions.php | ||
---|---|---|
1 |
<?php |
|
2 |
require_once('objects/types.php'); |
|
3 |
|
|
4 |
function isIgnoreType($nodeName) { |
|
5 |
global $ignoreTypes; |
|
6 |
return $ignoreTypes[$nodeName]; |
|
7 |
} |
|
8 |
|
|
9 |
function isPrimitiveType($nodeName) { |
|
10 |
global $primitiveTypes; |
|
11 |
return $primitiveTypes[$nodeName]; |
|
12 |
} |
|
13 |
|
|
14 |
function isKnownAttribute($attrName) { |
|
15 |
global $knownAttributes; |
|
16 |
return $knownAttributes[$attrName]; |
|
17 |
} |
|
18 |
|
|
19 |
function needsIndex($colName) { |
|
20 |
global $indexableCols; |
|
21 |
return $indexableCols[$colName]; |
|
22 |
} |
|
23 |
|
|
24 |
function isNativePointer($colName) { |
|
25 |
global $nativeVegXPointers; |
|
26 |
return $nativeVegXPointers[$colName]; |
|
27 |
} |
|
28 |
|
|
29 |
function hasImportantNodes($node) { |
|
30 |
$nodes = $node->getElementsbyTagName("*"); |
|
31 |
foreach($nodes as $node) { |
|
32 |
$nodeName = preg_replace("/.*:/","",$node->nodeName); |
|
33 |
if(!isIgnoreType($nodeName)) { |
|
34 |
return true; |
|
35 |
} |
|
36 |
} |
|
37 |
return false; |
|
38 |
} |
|
39 |
|
|
40 |
function stringXMLNode($node) { |
|
41 |
return $node->ownerDocument->saveXML($node); |
|
42 |
} |
|
43 |
|
|
44 |
function findFirstAncestorName($node) { |
|
45 |
$xpath = new DOMXPath($node->ownerDocument); |
|
46 |
$parentNodes = $xpath->query("parent::*",$node); |
|
47 |
$parentNode = $parentNodes->item(0); |
|
48 |
$name = $parentNode->getAttribute('name'); |
|
49 |
if($name != '') { |
|
50 |
return $name; |
|
51 |
} else { |
|
52 |
return findFirstAncestorName($parentNode); |
|
53 |
} |
|
54 |
} |
|
55 |
|
|
56 |
?> |
makeVegxModel/trunk/util/dataTypes.php | ||
---|---|---|
1 |
<?php |
|
2 |
$ignoreTypes = array( |
|
3 |
"description" => true, |
|
4 |
"example" => true, |
|
5 |
"moduleDescription" => true, |
|
6 |
"moduleDocs" => true, |
|
7 |
"moduleName" => true, |
|
8 |
"recommendedUsage" => true, |
|
9 |
"standAlone" => true, |
|
10 |
"summary" => true, |
|
11 |
"tooltip" => true, |
|
12 |
"annotation" => true, |
|
13 |
"appinfo" => true, |
|
14 |
"documentation" => true, |
|
15 |
"import" => true, |
|
16 |
"include" => true, |
|
17 |
"schema" => true, |
|
18 |
"AdditionalAbbreviations" => true, |
|
19 |
"BacteriaStatus" => true, |
|
20 |
"BioCodeStatus" => true, |
|
21 |
"BotanyStatus" => true, |
|
22 |
"li" => true, |
|
23 |
"p" => true, |
|
24 |
"para" => true, |
|
25 |
"PreferredAbbreviation" => true, |
|
26 |
"section" => true, |
|
27 |
"Specification" => true, |
|
28 |
"SuffixAlgae" => true, |
|
29 |
"SuffixAnimalia" => true, |
|
30 |
"SuffixBacteriae" => true, |
|
31 |
"SuffixFungi" => true, |
|
32 |
"SuffixFungi" => true, |
|
33 |
"SuffixPlantae" => true, |
|
34 |
"title" => true, |
|
35 |
"ul" => true, |
|
36 |
"#text" => true, |
|
37 |
"#comment" => true, |
|
38 |
"ZoologyStatus" => true |
|
39 |
); |
|
40 |
|
|
41 |
function isIgnoreType($nodeName) { |
|
42 |
global $ignoreTypes; |
|
43 |
return $ignoreTypes[$nodeName]; |
|
44 |
} |
|
45 |
|
|
46 |
$files = array( |
|
47 |
'eml-access.xsd', |
|
48 |
'eml-coverage.xsd', |
|
49 |
'eml-documentation.xsd', |
|
50 |
'eml-literature.xsd', |
|
51 |
'eml-party.xsd', |
|
52 |
'eml-project.xsd', |
|
53 |
'eml-resource.xsd', |
|
54 |
'eml-text.xsd', |
|
55 |
'tcsv101.xsd', |
|
56 |
'tdwg_basetypes.xsd', |
|
57 |
'tdwg_dw_element.xsd', |
|
58 |
'tdwg_dw_geospatial.xsd', |
|
59 |
'veg-misc.xsd', |
|
60 |
'veg-organismobservation.xsd', |
|
61 |
'veg-plotobservation.xsd', |
|
62 |
'veg.xsd', |
|
63 |
'veg-plot.xsd'); |
|
64 |
|
|
65 |
|
|
66 |
function hasImportantNodes($node) { |
|
67 |
$nodes = $node->getElementsbyTagName("*"); |
|
68 |
foreach($nodes as $node) { |
|
69 |
$nodeName = preg_replace("/.*:/","",$node->nodeName); |
|
70 |
if(!isIgnoreType($nodeName)) { |
|
71 |
return true; |
|
72 |
} |
|
73 |
} |
|
74 |
return false; |
|
75 |
} |
|
76 |
|
|
77 |
foreach($files as $file) { |
|
78 |
$xmlDoc = new DOMDocument(); |
|
79 |
$xmlDoc->load($file); |
|
80 |
$nodes = $xmlDoc->getElementsByTagName("*"); |
|
81 |
foreach($nodes as $node) { |
|
82 |
$nodeName = preg_replace("/.*:/","",$node->nodeName); |
|
83 |
if(!isIgnoreType($nodeName)) { |
|
84 |
#if($nodeName == 'element' || $nodeName == 'restriction' || $nodeName == 'extension') { |
|
85 |
if($nodeName == 'attribute') { |
|
86 |
$baseName = $node->getAttribute('base'); |
|
87 |
$baseName = preg_replace("/.*:/","",$baseName); |
|
88 |
$typeName = $node->getAttribute('type'); |
|
89 |
$typeName = preg_replace("/.*:/","",$typeName); |
|
90 |
if($baseName != '') { |
|
91 |
print $baseName . "\n"; |
|
92 |
} else if($typeName != '') { |
|
93 |
print $typeName . "\n"; |
|
94 |
} |
|
95 |
} |
|
96 |
} |
|
97 |
} |
|
98 |
} |
|
99 |
?> |
|
100 |
|
makeVegxModel/trunk/util/typeBreakdown.php | ||
---|---|---|
1 |
<?php |
|
2 |
$primitiveTypes = array( |
|
3 |
'DEFCATEGORIE' => 'enumerated string', |
|
4 |
'decimalLatitudeDataType' => 'xs:double [-90,90]', |
|
5 |
'decimalLongitudeDataType' => 'xs:double [-180,180]', |
|
6 |
'spatialFitDataType' => 'xs:double (0 or >1 or undefined)', |
|
7 |
'PERCENT' => 'xsd:decimal [0,100]', |
|
8 |
'RoleType' => 'xs:string (union of enumerated string & unenumerated string)', |
|
9 |
'TaxonomicRankEnum' => 'xs:string (union of several enumerated string types for one big list of enum values)', |
|
10 |
'TINYINT' => 'xsd:decimal [-128,128]', |
|
11 |
'yearDate' => 'xs:gYear xs:date (union of)', |
|
12 |
'anyType' => 'xsd:anyType', |
|
13 |
'date' => 'xsd:date', |
|
14 |
'anyURI' => 'xs:anyURI', |
|
15 |
'decimal' => 'xsd:decimal', |
|
16 |
'gYear' => 'xsd:gYear', |
|
17 |
'string' => 'xs:string', |
|
18 |
'positiveInteger' => 'xs:positiveInteger', |
|
19 |
'time' => 'xs:time', |
|
20 |
'Name' => 'xsd:Name', |
|
21 |
'double' => 'xs:double', |
|
22 |
'integer' => 'xs:integer', |
|
23 |
'NMTOKEN' => 'xs:NMTOKEN', |
|
24 |
|
|
25 |
); |
|
26 |
|
|
27 |
$primitiveGroupings = array( |
|
28 |
'GRingPointType' => array('gRingLatitude' => 'xs:string', 'gRingLongitude' => 'xs:string'), |
|
29 |
'simpleUserdefinedType' => array('name' => 'xs:string', 'value' => 'xs:string', 'methodID' => 'xs:string'), |
|
30 |
'partyWithRoleType' => array('partyID' => 'xs:anyType', 'role' => 'xs:anyType'), |
|
31 |
'ReferenceType' => array('attribute_ref (op)' => 'xsd:token', 'attribute_linkType (op)' => 'enumerated string'), |
|
32 |
'relativeSpatialCoordinateType' => array('relativeX' => 'xsd:decimal', 'relativeY' => 'xsd:decimal', 'relativeZ (op)' => 'xsd:decimal'), |
|
33 |
'TaxonomicRank' => array('text' => 'xs:string', 'attribute_code' => 'TaxonomicRankEnum (op)'), |
|
34 |
'RelationshipType' => array('text' => 'xs:text (maybe)', 'attribute_type' => 'enumerated string (required)'), |
|
35 |
); |
|
36 |
|
|
37 |
$primitiveGroupingsVariableLength = array( |
|
38 |
'AccessRule' => array('principal' => 'xs:string', 'permission' => 'xs:string'), |
|
39 |
'Person' => array('salutation' => 'xs:string (unbounded)', 'givenName' => 'xs:string (unbounded)', 'surName' => 'xs:string'), |
|
40 |
'PlaceholderType' => array('any' => 'xs:any (unbounded)', 'anyAttribute' => 'xs:string (likely unbounded)'), |
|
41 |
'SubSuperScriptType' => array('text' => 'xs:string', 'subscript' => 'SubSuperScriptType (ch) (unbounded)', 'superscript' => 'SubSuperScriptType (ch) (unbounded)'), |
|
42 |
'TaxonomicClassificationType' => array('taxonRankName' => 'xs:string (op)', 'taxonRankValue' => 'xs:string (op)', 'commonName' => 'xs:string (op) (unbounded)', 'taxonomicClassification' => 'TaxonomicClassificationType (op) (unbounded)'), |
|
43 |
); |
|
44 |
|
|
45 |
$complexTypes = array( |
|
46 |
'AccessType' => 1, |
|
47 |
); |
|
48 |
|
|
49 |
?> |
makeVegxModel/trunk/util/allUsedTypes.php | ||
---|---|---|
1 |
<?php |
|
2 |
$allUsedTypes = array( |
|
3 |
'AccessRule' => 1, |
|
4 |
'AccessType' => 1, |
|
5 |
'AccordingToType' => 1, |
|
6 |
'Address' => 1, |
|
7 |
'AgentNames' => 1, |
|
8 |
'anyType' => 1, |
|
9 |
'anyURI' => 1, |
|
10 |
'Article' => 1, |
|
11 |
'AudioVisual' => 1, |
|
12 |
'Book' => 1, |
|
13 |
'CanonicalAuthorship' => 1, |
|
14 |
'CanonicalName' => 1, |
|
15 |
'Chapter' => 1, |
|
16 |
'CitationType' => 1, |
|
17 |
'complexUserDefinedType' => 1, |
|
18 |
'ConferenceProceedings' => 1, |
|
19 |
'ConnectionDefinitionType' => 1, |
|
20 |
'Coverage' => 1, |
|
21 |
'date' => 1, |
|
22 |
'decimal' => 1, |
|
23 |
'decimalLatitudeDataType' => 1, |
|
24 |
'decimalLongitudeDataType' => 1, |
|
25 |
'DEFCATEGORIE' => 1, |
|
26 |
'DistributionType' => 1, |
|
27 |
'double' => 1, |
|
28 |
'Generic' => 1, |
|
29 |
'GeographicCoverage' => 1, |
|
30 |
'geospatialType' => 1, |
|
31 |
'GRingPointType' => 1, |
|
32 |
'gYear' => 1, |
|
33 |
'InlineType' => 1, |
|
34 |
'integer' => 1, |
|
35 |
'ListType' => 1, |
|
36 |
'Manuscript' => 1, |
|
37 |
'Map' => 1, |
|
38 |
'MeasurementType' => 1, |
|
39 |
'Name' => 1, |
|
40 |
'NameCitation' => 1, |
|
41 |
'NMTOKEN' => 1, |
|
42 |
'NomenclaturalNoteType' => 1, |
|
43 |
'noteType' => 1, |
|
44 |
'ParagraphType' => 1, |
|
45 |
'partyWithRoleType' => 1, |
|
46 |
'PERCENT' => 1, |
|
47 |
'percentCoverType' => 1, |
|
48 |
'Person' => 1, |
|
49 |
'PersonalCommunication' => 1, |
|
50 |
'PlaceholderType' => 1, |
|
51 |
'positiveInteger' => 1, |
|
52 |
'Presentation' => 1, |
|
53 |
'ReferenceType' => 1, |
|
54 |
'relatedItemType' => 1, |
|
55 |
'relatedPlotType' => 1, |
|
56 |
'RelationshipType' => 1, |
|
57 |
'relativeSpatialCoordinateType' => 1, |
|
58 |
'Report' => 1, |
|
59 |
'ResearchProjectType' => 1, |
|
60 |
'ResponsibleParty' => 1, |
|
61 |
'RoleType' => 1, |
|
62 |
'ScientificName' => 1, |
|
63 |
'SectionType' => 1, |
|
64 |
'simpleUserdefinedType' => 1, |
|
65 |
'SingleDateTimeType' => 1, |
|
66 |
'spatialFitDataType' => 1, |
|
67 |
'string' => 1, |
|
68 |
'SubSuperScriptType' => 1, |
|
69 |
'TaxonConceptType' => 1, |
|
70 |
'TaxonomicClassificationType' => 1, |
|
71 |
'TaxonomicCoverage' => 1, |
|
72 |
'TaxonomicRank' => 1, |
|
73 |
'TemporalCoverage' => 1, |
|
74 |
'TextType' => 1, |
|
75 |
'Thesis' => 1, |
|
76 |
'time' => 1, |
|
77 |
'TINYINT' => 1, |
|
78 |
'vegetationStructureType' => 1, |
|
79 |
'yearDate' => 1 |
|
80 |
); |
|
81 |
|
|
82 |
?> |
makeVegxModel/trunk/util/nodeCount.php | ||
---|---|---|
1 |
<?php |
|
2 |
$ignoreTypes = array( |
|
3 |
"doc:description" => true, |
|
4 |
"doc:example" => true, |
|
5 |
"doc:moduleDescription" => true, |
|
6 |
"doc:moduleDocs" => true, |
|
7 |
"doc:moduleName" => true, |
|
8 |
"doc:recommendedUsage" => true, |
|
9 |
"doc:standAlone" => true, |
|
10 |
"doc:summary" => true, |
|
11 |
"doc:tooltip" => true, |
|
12 |
"xsd:annotation" => true, |
|
13 |
"xsd:appinfo" => true, |
|
14 |
"xsd:documentation" => true, |
|
15 |
"xsd:import" => true, |
|
16 |
"xsd:include" => true, |
|
17 |
"xsd:schema" => true, |
|
18 |
"AdditionalAbbreviations" => true, |
|
19 |
"BacteriaStatus" => true, |
|
20 |
"BioCodeStatus" => true, |
|
21 |
"BotanyStatus" => true, |
|
22 |
"li" => true, |
|
23 |
"p" => true, |
|
24 |
"para" => true, |
|
25 |
"PreferredAbbreviation" => true, |
|
26 |
"section" => true, |
|
27 |
"Specification" => true, |
|
28 |
"SuffixAlgae" => true, |
|
29 |
"SuffixAnimalia" => true, |
|
30 |
"SuffixBacteriae" => true, |
|
31 |
"SuffixFungi" => true, |
|
32 |
"SuffixFungi" => true, |
|
33 |
"SuffixPlantae" => true, |
|
34 |
"title" => true, |
|
35 |
"ul" => true, |
|
36 |
"ZoologyStatus" => true); |
|
37 |
|
|
38 |
function isIgnoreType($nodeName) { |
|
39 |
global $ignoreTypes; |
|
40 |
return $ignoreTypes[$nodeName]; |
|
41 |
} |
|
42 |
|
|
43 |
$files = array( |
|
44 |
'eml-access.xsd', |
|
45 |
'eml-coverage.xsd', |
|
46 |
'eml-documentation.xsd', |
|
47 |
'eml-literature.xsd', |
|
48 |
'eml-party.xsd', |
|
49 |
'eml-project.xsd', |
|
50 |
'eml-resource.xsd', |
|
51 |
'eml-text.xsd', |
|
52 |
'tcsv101.xsd', |
|
53 |
'tdwg_basetypes.xsd', |
|
54 |
'tdwg_dw_element.xsd', |
|
55 |
'tdwg_dw_geospatial.xsd', |
|
56 |
'veg-misc.xsd', |
|
57 |
'veg-organismobservation.xsd', |
|
58 |
'veg-plotobservation.xsd', |
|
59 |
'veg-plot.xsd'); |
|
60 |
foreach($files as $file) { |
|
61 |
$xmlDoc = new DOMDocument(); |
|
62 |
$xmlDoc->load($file); |
|
63 |
$nodes = $xmlDoc->getElementsByTagName("*"); |
|
64 |
foreach($nodes as $node) { |
|
65 |
$nodeName = preg_replace("/xs:/","xsd:",$node->nodeName); |
|
66 |
if(!isIgnoreType($nodeName)) { |
|
67 |
print $nodeName . "\n"; |
|
68 |
} |
|
69 |
} |
|
70 |
} |
|
71 |
?> |
makeVegxModel/trunk/util/parseVegx.php | ||
---|---|---|
1 |
<?php |
|
2 |
$ignoreTypes = array( |
|
3 |
"doc:description" => true, |
|
4 |
"doc:example" => true, |
|
5 |
"doc:moduleDescription" => true, |
|
6 |
"doc:moduleDocs" => true, |
|
7 |
"doc:moduleName" => true, |
|
8 |
"doc:recommendedUsage" => true, |
|
9 |
"doc:standAlone" => true, |
|
10 |
"doc:summary" => true, |
|
11 |
"doc:tooltip" => true, |
|
12 |
"xsd:annotation" => true, |
|
13 |
"xsd:appinfo" => true, |
|
14 |
"xsd:documentation" => true, |
|
15 |
"xsd:import" => true, |
|
16 |
"xsd:include" => true, |
|
17 |
"xsd:schema" => true, |
|
18 |
"AdditionalAbbreviations" => true, |
|
19 |
"BacteriaStatus" => true, |
|
20 |
"BioCodeStatus" => true, |
|
21 |
"BotanyStatus" => true, |
|
22 |
"li" => true, |
|
23 |
"p" => true, |
|
24 |
"para" => true, |
|
25 |
"PreferredAbbreviation" => true, |
|
26 |
"section" => true, |
|
27 |
"Specification" => true, |
|
28 |
"SuffixAlgae" => true, |
|
29 |
"SuffixAnimalia" => true, |
|
30 |
"SuffixBacteriae" => true, |
|
31 |
"SuffixFungi" => true, |
|
32 |
"SuffixFungi" => true, |
|
33 |
"SuffixPlantae" => true, |
|
34 |
"title" => true, |
|
35 |
"ul" => true, |
|
36 |
"ZoologyStatus" => true); |
|
37 |
|
|
38 |
function isIgnoreType($nodeName) { |
|
39 |
global $ignoreTypes; |
|
40 |
return $ignoreTypes[$nodeName]; |
|
41 |
} |
|
42 |
|
|
43 |
$files = array( |
|
44 |
'eml-access.xsd', |
|
45 |
'eml-coverage.xsd', |
|
46 |
'eml-documentation.xsd', |
|
47 |
'eml-literature.xsd', |
|
48 |
'eml-party.xsd', |
|
49 |
'eml-project.xsd', |
|
50 |
'eml-resource.xsd', |
|
51 |
'eml-text.xsd', |
|
52 |
'tcsv101.xsd', |
|
53 |
'tdwg_basetypes.xsd', |
|
54 |
'tdwg_dw_element.xsd', |
|
55 |
'tdwg_dw_geospatial.xsd', |
|
56 |
'veg-misc.xsd', |
|
57 |
'veg-organismobservation.xsd', |
|
58 |
'veg-plotobservation.xsd', |
|
59 |
'veg-plot.xsd'); |
|
60 |
|
|
61 |
|
|
62 |
function hasImportantNodes($node) { |
|
63 |
$nodes = $node->getElementsbyTagName("*"); |
|
64 |
foreach($nodes as $node) { |
|
65 |
$nodeName = preg_replace("/xs:/","xsd:",$node->nodeName); |
|
66 |
if(!isIgnoreType($nodeName)) { |
|
67 |
return true; |
|
68 |
} |
|
69 |
} |
|
70 |
return false; |
|
71 |
} |
|
72 |
|
|
73 |
foreach($files as $file) { |
|
74 |
$xmlDoc = new DOMDocument(); |
|
75 |
$xmlDoc->load($file); |
|
76 |
$nodes = $xmlDoc->getElementsByTagName("*"); |
|
77 |
foreach($nodes as $node) { |
|
78 |
$nodeName = preg_replace("/xs:/","xsd:",$node->nodeName); |
|
79 |
if(!isIgnoreType($nodeName)) { |
|
80 |
if($nodeName == 'xsd:element') { # || $nodeName == 'xsd:complexType') { |
|
81 |
$attrName = $node->getAttribute('name'); |
|
82 |
if($attrName != '' && $node->getAttribute('type') == '') { |
|
83 |
if(hasImportantNodes($node)) { |
|
84 |
$poNodes = $node->getElementsByTagName("*"); |
|
85 |
print "$file: <$nodeName $attrName:\n "; |
|
86 |
foreach($poNodes as $poNode) { print " " . $poNode->nodeName . "\n"; } |
|
87 |
} |
|
88 |
} |
|
89 |
} |
|
90 |
} |
|
91 |
} |
|
92 |
} |
|
93 |
?> |
|
94 |
|
makeVegxModel/trunk/eml-project.xsd | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:doc="eml://ecoinformatics.org/documentation-2.0.1" xmlns:txt="eml://ecoinformatics.org/text-2.0.1" xmlns:prot="eml://ecoinformatics.org/protocol-2.0.1" xmlns:rp="eml://ecoinformatics.org/party-2.0.1" xmlns:cov="eml://ecoinformatics.org/coverage-2.0.1" xmlns:cit="eml://ecoinformatics.org/literature-2.0.1" xmlns:res="eml://ecoinformatics.org/resource-2.0.1" xmlns="eml://ecoinformatics.org/project-2.0.1" targetNamespace="eml://ecoinformatics.org/project-2.0.1"> |
|
3 |
<xs:import namespace="eml://ecoinformatics.org/documentation-2.0.1" schemaLocation="eml-documentation.xsd"/> |
|
4 |
<xs:import namespace="eml://ecoinformatics.org/resource-2.0.1" schemaLocation="eml-resource.xsd"/> |
|
5 |
<xs:import namespace="eml://ecoinformatics.org/party-2.0.1" schemaLocation="eml-party.xsd"/> |
|
6 |
<xs:import namespace="eml://ecoinformatics.org/coverage-2.0.1" schemaLocation="eml-coverage.xsd"/> |
|
7 |
<xs:import namespace="eml://ecoinformatics.org/literature-2.0.1" schemaLocation="eml-literature.xsd"/> |
|
8 |
<xs:import namespace="eml://ecoinformatics.org/text-2.0.1" schemaLocation="eml-text.xsd"/> |
|
9 |
<xs:annotation> |
|
10 |
<xs:documentation>'$RCSfile: eml-project.xsd,v $' |
|
11 |
Copyright: 1997-2002 Regents of the University of California, |
|
12 |
University of New Mexico, and |
|
13 |
Arizona State University |
|
14 |
Sponsors: National Center for Ecological Analysis and Synthesis and |
|
15 |
Partnership for Interdisciplinary Studies of Coastal Oceans, |
|
16 |
University of California Santa Barbara |
|
17 |
Long-Term Ecological Research Network Office, |
|
18 |
University of New Mexico |
|
19 |
Center for Environmental Studies, Arizona State University |
|
20 |
Other funding: National Science Foundation (see README for details) |
|
21 |
The David and Lucile Packard Foundation |
|
22 |
For Details: http://knb.ecoinformatics.org/ |
|
23 |
|
|
24 |
'$Author: jones $' |
|
25 |
'$Date: 2003/11/06 20:15:21 $' |
|
26 |
'$Revision: 1.77 $' |
|
27 |
</xs:documentation> |
|
28 |
<xs:appinfo> |
|
29 |
</xs:appinfo> |
|
30 |
</xs:annotation> |
|
31 |
<xs:element name="researchProject" type="ResearchProjectType"> |
|
32 |
<xs:annotation> |
|
33 |
<xs:appinfo> |
|
34 |
<doc:tooltip>research project</doc:tooltip> |
|
35 |
<doc:summary>The root element of this module.</doc:summary> |
|
36 |
</xs:appinfo> |
|
37 |
</xs:annotation> |
|
38 |
</xs:element> |
|
39 |
<xs:complexType name="ResearchProjectType"> |
|
40 |
<xs:annotation> |
|
41 |
<xs:appinfo> |
|
42 |
<doc:tooltip>Research project descriptor</doc:tooltip> |
|
43 |
<doc:summary>Descriptor of a research context for a dataset or another |
|
44 |
project.</doc:summary> |
|
45 |
</xs:appinfo> |
|
46 |
</xs:annotation> |
|
47 |
<xs:choice> |
|
48 |
<xs:sequence> |
|
49 |
<xs:element name="title" type="xs:string" maxOccurs="unbounded"> |
|
50 |
<xs:annotation> |
|
51 |
<xs:appinfo> |
|
52 |
<doc:tooltip>Project Title</doc:tooltip> |
|
53 |
<doc:summary>Title of the project.</doc:summary> |
|
54 |
</xs:appinfo> |
|
55 |
</xs:annotation> |
|
56 |
</xs:element> |
|
57 |
<xs:element name="personnel" maxOccurs="unbounded"> |
|
58 |
<xs:annotation> |
|
59 |
<xs:appinfo> |
|
60 |
<doc:tooltip>Personnel</doc:tooltip> |
|
61 |
<doc:summary>Contact and role information for people involved in |
|
62 |
the research project.</doc:summary> |
|
63 |
</xs:appinfo> |
|
64 |
</xs:annotation> |
|
65 |
<xs:complexType> |
|
66 |
<xs:complexContent> |
|
67 |
<xs:extension base="rp:ResponsibleParty"> |
|
68 |
<xs:sequence maxOccurs="unbounded"> |
|
69 |
<xs:element name="role" type="rp:RoleType"> |
|
70 |
<xs:annotation> |
|
71 |
<xs:appinfo> |
|
72 |
<doc:tooltip>Role</doc:tooltip> |
|
73 |
<doc:summary>Role information for people involved in |
|
74 |
the research project.</doc:summary> |
|
75 |
</xs:appinfo> |
|
76 |
</xs:annotation> |
|
77 |
</xs:element> |
|
78 |
</xs:sequence> |
|
79 |
</xs:extension> |
|
80 |
</xs:complexContent> |
|
81 |
</xs:complexType> |
|
82 |
</xs:element> |
|
83 |
<xs:element name="abstract" type="txt:TextType" minOccurs="0"> |
|
84 |
<xs:annotation> |
|
85 |
<xs:appinfo> |
|
86 |
<doc:tooltip>Abstract</doc:tooltip> |
|
87 |
<doc:summary>Project Abstract.</doc:summary> |
|
88 |
</xs:appinfo> |
|
89 |
</xs:annotation> |
|
90 |
</xs:element> |
|
91 |
<xs:element name="funding" type="txt:TextType" minOccurs="0"> |
|
92 |
<xs:annotation> |
|
93 |
<xs:appinfo> |
|
94 |
<doc:tooltip>Funding</doc:tooltip> |
|
95 |
<doc:summary>Funding information.</doc:summary> |
|
96 |
</xs:appinfo> |
|
97 |
</xs:annotation> |
|
98 |
</xs:element> |
|
99 |
<xs:element name="studyAreaDescription" minOccurs="0"> |
|
100 |
<xs:annotation> |
|
101 |
<xs:appinfo> |
|
102 |
<doc:tooltip>Description of the study area.</doc:tooltip> |
|
103 |
<doc:summary>Description of the physical area associated with the |
|
104 |
research project, potentially including coverage, climate, |
|
105 |
geology, distrubances, etc.</doc:summary> |
|
106 |
</xs:appinfo> |
|
107 |
</xs:annotation> |
|
108 |
<xs:complexType> |
|
109 |
<xs:choice maxOccurs="unbounded"> |
|
110 |
<xs:element name="descriptor"> |
|
111 |
<xs:annotation> |
|
112 |
<xs:appinfo> |
|
113 |
<doc:tooltip>Non-coverage characteristics of the study |
|
114 |
area</doc:tooltip> |
|
115 |
<doc:summary>Description of non-coverage characteristics of |
|
116 |
the study area such as climate, geology, |
|
117 |
disturbances</doc:summary> |
|
118 |
</xs:appinfo> |
|
119 |
</xs:annotation> |
|
120 |
<xs:complexType> |
|
121 |
<xs:sequence maxOccurs="unbounded"> |
|
122 |
<xs:element name="descriptorValue" maxOccurs="unbounded"> |
|
123 |
<xs:annotation> |
|
124 |
<xs:appinfo> |
|
125 |
<doc:tooltip>Description of some aspect of the study |
|
126 |
area.</doc:tooltip> |
|
127 |
<doc:summary>Description of some aspect of the study |
|
128 |
area.</doc:summary> |
|
129 |
</xs:appinfo> |
|
130 |
</xs:annotation> |
|
131 |
<xs:complexType> |
|
132 |
<xs:simpleContent> |
|
133 |
<xs:extension base="xs:string"> |
|
134 |
<xs:attribute name="name_or_id" type="xs:string" use="optional"> |
|
135 |
<xs:annotation> |
|
136 |
<xs:appinfo> |
|
137 |
<doc:tooltip>The name or ID of a descriptor |
|
138 |
value.</doc:tooltip> |
|
139 |
<doc:summary>The name part of a name/value |
|
140 |
pair of a descriptor; or ID portion of a |
|
141 |
classification, if applicable.</doc:summary> |
|
142 |
</xs:appinfo> |
|
143 |
</xs:annotation> |
|
144 |
</xs:attribute> |
|
145 |
</xs:extension> |
|
146 |
</xs:simpleContent> |
|
147 |
</xs:complexType> |
|
148 |
</xs:element> |
Also available in: Unified diff
Moved pre-BIEN 3 files into _archive folder