Project

General

Profile

« Previous | Next » 

Revision 1666

bin/tapir/tapir2flat.php: Fixed XML parsing to strip control chars so DOMDocument::loadXML() wouldn't complain about "PCDATA invalid Char value 8 in Entity", etc.

View differences:

bin/tapir/tapir2flat.php
34 34
$record_num_filename = "recordnum.dat";
35 35
$error_log_filename = "error.log";
36 36

  
37
$ctrl_chars = array_flip(array_map("chr", range(0, 31)));
38
foreach (array("\t", "\n", "\r") as $whitespace)
39
    unset($ctrl_chars[$whitespace]);
40
$ctrl_chars = array_keys($ctrl_chars);
41

  
37 42
$supportedConcepts = getAllConcepts();
38 43
if(!is_array($supportedConcepts)){
39 44
  $error_log = fopen($error_log_filename,"a");
......
154 159
       continue;
155 160
    }
156 161

  
157
    #Weird encoding bug - need to remove the ^F and ^L characters so
158
    #that the xml parser won't choke.
162
    $response = filter_var(str_replace($ctrl_chars, "", $response),
163
        FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_HIGH);
159 164
    $xmlDoc = new DOMDocument();
160
    $response = preg_replace("//","6",$response);
161
    $response = preg_replace("//","12",$response);
162 165
    $xmlDoc->loadXML($response);
163 166

  
164 167
    $errors = $xmlDoc->getElementsByTagName("error");

Also available in: Unified diff