<?php
#############################################################################
# Driver file for taiper2flatClient, a simple script to 
# consume taiper web services and save the results in a delimited file,
# one record per line.  The current delimiter is comma ',' but
# that can be changed by changing the $seperator variable located
# in configurableParams.php.
#
# Command line useage:
#   php taiper2flat.php
#
# dependencies:
# php 5+
# pear
# HTTP_Request
# I had to install pear and HTTP_Request before it would work (on Ubuntu):
#   sudo apt-get install php-http-request
# This should install all dependencies, but just in case, the full
# dependency list is:
#   php-net-socket 
#   php-net-url
#   php-pear
#   php-http-request
#############################################################################

require_once('HTTP/Request.php'); // pear package
require_once('tapirRequestTemplate.php'); 
require_once('getAllConcepts.php'); 

global $url;
global $seperator;

$flat_filename = "specieslink.specimens.csv";
$record_num_filename = "recordnum.dat";
$error_log_filename = "error.log";

$ctrl_chars = array_flip(array_map("chr", range(0, 31)));
foreach (array("\t", "\n", "\r") as $whitespace)
    unset($ctrl_chars[$whitespace]);
$ctrl_chars = array_keys($ctrl_chars);

$supportedConcepts = getAllConcepts();
if(!is_array($supportedConcepts)){
  $error_log = fopen($error_log_filename,"a");
  fwrite($error_log, "Unable to retrieve supported concepts from service, failing.\n");
  fclose($error_log);
  exit;
}

$successive_failures = 0;

$start_record = 0;	// Record to begin first request (ordinal position, not ID)
if(file_exists($record_num_filename)) {
  $start_record = file_get_contents($record_num_filename);
}

$request_limit = 1000;		// Number of records per request; cannot exceed 1000

//According to TAPIR docs, the estimate from the service 
//is an upper bound on the number of records.  Start out high.
$estimated_max_from_service = 4000000;  

if(!file_exists($flat_filename)) {
  $flat_file = fopen($flat_filename,"a");
  $file_header = '';
  foreach(array_keys($supportedConcepts) as $key){
    $file_header .= $key . $seperator;
  }
  $file_header = substr_replace($file_header ,"",-1);
  $file_header .= "\n";
  fwrite($flat_file,$file_header);
  fclose($flat_file);
}

$finished = false;
$itrNum = 0;

$http_request = new HTTP_Request();

while(!$finished && $start_record < $estimated_max_from_service) {
    $itrNum++;

    $body = buildRequest($start_record,$request_limit,$supportedConcepts);

    $http_request->setMethod( 'POST' );
    $http_request->addHeader('Content-Type', 'text/xml');
    $http_request->addRawPostData( $body );
    $http_request->setURL( $url );
    $http_request->_timeout = 300;
    $http_request->_readTimeout = 300;

    // This can be used to see the entire request
    #$raw_request = $http_request->_buildRequest();
    #echo "\n\n" . $raw_request;

    $http_request->sendRequest();

    $response = $http_request->getResponseBody();
    $code = $http_request->getResponseCode();

    if ( $code != 200 ) // 200 = OK
    {
        $label = 'Unknown Error';

        switch ( $code )
        {
            case 201: $label = 'Created'; break;
            case 202: $label = 'Accepted'; break;
            case 203: $label = 'Non-Authoritative Information'; break;
            case 204: $label = 'No Content'; break;
            case 205: $label = 'Reset Content'; break;
            case 206: $label = 'Partial Content'; break;
            case 300: $label = 'Multiple Choices'; break;
            case 301: $label = 'Moved Permanently'; break;
            case 302: $label = 'Found'; break;
            case 303: $label = 'See Other'; break;
            case 304: $label = 'Not Modified'; break;
            case 305: $label = 'Use Proxy'; break;
            case 307: $label = 'Temporary Redirect'; break;
            case 400: $label = 'Bad Request'; break;
            case 401: $label = 'Unauthorized'; break;
            case 402: $label = 'Payment Required'; break;
            case 403: $label = 'Forbidden'; break;
            case 404: $label = 'Not Found'; break;
            case 405: $label = 'Method Not Allowed'; break;
            case 406: $label = 'Not Acceptable'; break;
            case 407: $label = 'Proxy Authentication Required'; break;
            case 408: $label = 'Request Timeout'; break;
            case 409: $label = 'Conflict'; break;
            case 410: $label = 'Gone'; break;
            case 411: $label = 'Length Required'; break;
            case 412: $label = 'Precondition Failed'; break;
            case 413: $label = 'Request Entity Too Large'; break;
            case 414: $label = 'Request-URI Too Long'; break;
            case 415: $label = 'Unsupported Media Type'; break;
            case 416: $label = 'Requested Range Not Satisfiable'; break;
            case 417: $label = 'Expectation Failed'; break;
            case 500: $label = 'Internal Server Error'; break;
            case 501: $label = 'Not Implemented'; break;
            case 502: $label = 'Bad Gateway'; break;
            case 503: $label = 'Service Unavailable'; break;
            case 504: $label = 'Gateway Timeout'; break;
            case 505: $label = 'HTTP Version Not Supported'; break;
	}

       $error_log = fopen($error_log_filename,"a");
       fwrite($error_log, "Service responded with HTTP ".$code." code: ".$label."\n");

       #$successive_failures += 1;
       #if($successive_failures >= 3) {
       #  fwrite($error_log, "3 successive failures, quitting\n");
       #  fclose($error_log);
       #  exit;
       #}
     
       fclose($error_log);

       //Should repeat the same request
       continue;
    }

    $response = filter_var(str_replace($ctrl_chars, "", $response),
        FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_HIGH);
    $xmlDoc = new DOMDocument();
    $xmlDoc->loadXML($response);

    $errors = $xmlDoc->getElementsByTagName("error");
    if($errors->length > 0) {
       $error_log = fopen($error_log_filename,"a");
       foreach($errors as $error) {
         fwrite($error_log, $error->nodeValue . "\n");
       }

       #$successive_failures += 1;
       #if($successive_failures >= 3) {
       #  fwrite($error_log, "3 successive failures, quitting\n");
       #  fclose($error_log);
       #  exit;
       #}
     
       fclose($error_log);

       //Should repeat the same request
       continue;
    }

    $summary = $xmlDoc->getElementsByTagName("summary");
    if($summary->length == 0) {
       $error_log = fopen($error_log_filename,"a");
       fwrite($error_log, "No summary node, assuming there's a missed error.\n");

       #$successive_failures += 1;
       #if($successive_failures >= 3) {
       #  fwrite($error_log, "3 successive failures, quitting\n");
       #  fclose($error_log);
       #  exit;
       #}

       fclose($error_log);
       continue;
    } else {
      $start_record_temp = $summary->item(0)->getAttribute("next");
      $estimated_max_from_service = $summary->item(0)->getAttribute("totalMatched");
    }

    $flat_file = fopen($flat_filename,"a");

    $records = $xmlDoc->getElementsByTagName("record");
    $recordsStr = '';
    foreach($records as $record) {
      $fields = array();
      foreach(array_keys($supportedConcepts) as $key){
        $element = $record->getElementsByTagName($key);
        $fields[] = $element->length > 0 ? $element->item(0)->nodeValue : "";
      }
      fputcsv($flat_file,$fields,$seperator);
    }

    fwrite($flat_file,$recordsStr);
    fclose($flat_file);

    $successive_failures = 0;

    $record_file = fopen($record_num_filename,"w");
    fwrite($record_file,$start_record_temp);
    fclose($record_file);
    $start_record = $start_record_temp;    

    //If number of records is less than request_limit, it means
    //the service is out of records.
    $finished = ($request_limit > $records->length);

    if($itrNum == 1) {
      print "Extimated number of records:  $estimated_max_from_service\n";
    } elseif($itrNum % 10 == 0) {
      print "Pulled $start_record records out of $estimated_max_from_service ".
            "estimated total records.\n";
    }

}

?>

