Project

General

Profile

1
<?php
2

    
3
$ignoreTypes = array(
4
                     "doc:description"         => true,
5
                     "doc:example"             => true,
6
                     "doc:moduleDescription"   => true,
7
                     "doc:moduleDocs"          => true,
8
                     "doc:moduleName"          => true,
9
                     "doc:recommendedUsage"    => true,
10
                     "doc:standAlone"          => true,
11
                     "doc:summary"             => true,
12
                     "doc:tooltip"             => true,
13
                     "xsd:annotation"          => true,
14
                     "xsd:appinfo"             => true,
15
                     "xsd:documentation"       => true,
16
                     "xsd:import"              => true,
17
                     "xsd:include"             => true,
18
                     "xsd:schema"              => true,
19
                     "AdditionalAbbreviations" => true,
20
                     "BacteriaStatus"          => true,
21
                     "BioCodeStatus"           => true,
22
                     "BotanyStatus"            => true,
23
                     "li"                      => true,
24
                     "p"                       => true,
25
                     "para"                    => true,
26
                     "PreferredAbbreviation"   => true,
27
                     "section"                 => true,
28
                     "Specification"           => true,
29
                     "SuffixAlgae"             => true,
30
                     "SuffixAnimalia"          => true,
31
                     "SuffixBacteriae"         => true,
32
                     "SuffixFungi"             => true,
33
                     "SuffixFungi"             => true,
34
                     "SuffixPlantae"           => true,
35
                     "title"                   => true,
36
                     "ul"                      => true,
37
                     "ZoologyStatus"           => true);
38
            
39
function isIgnoreType($nodeName) {
40
  global $ignoreTypes;
41
  return $ignoreTypes[$nodeName];
42
}
43

    
44
$files = array(
45
               'eml-access.xsd',
46
               'eml-coverage.xsd',
47
               'eml-documentation.xsd',
48
               'eml-literature.xsd',
49
               'eml-party.xsd',
50
               'eml-project.xsd',
51
               'eml-resource.xsd',
52
               'eml-text.xsd',
53
               'tcsv101.xsd',
54
               'tdwg_basetypes.xsd',
55
               'tdwg_dw_element.xsd',
56
               'tdwg_dw_geospatial.xsd',
57
               'veg-misc.xsd',
58
               'veg-organismobservation.xsd',
59
               'veg-plotobservation.xsd',
60
               'veg.xsd',
61
               'veg-plot.xsd');
62

    
63

    
64
function hasImportantNodes($node) {
65
  $nodes = $node->getElementsbyTagName("*");
66
  foreach($nodes as $node) {
67
    $nodeName = preg_replace("/xs:/","xsd:",$node->nodeName);
68
    if(!isIgnoreType($nodeName)) {
69
      return true;
70
    }
71
  }
72
  return false;
73
}
74

    
75
$primitiveTypes = array(
76
  'DEFCATEGORIE' => 'enumerated string',
77
  'decimalLatitudeDataType' => 'xs:double [-90,90]',
78
  'decimalLongitudeDataType' => 'xs:double [-180,180]',
79
  'spatialFitDataType' => 'xs:double (0 or >1 or undefined)',
80
  'PERCENT' => 'xsd:decimal [0,100]',
81
  'RoleType' => 'xs:string (union of enumerated string & unenumerated string)',
82
  'TaxonomicRankEnum' => 'xs:Name (union of several enumerated string types for one big list of enum values)',
83
  'TINYINT' => 'xsd:decimal [-128,128]',
84
  'yearDate' => 'xs:gYear xs:date (union of)',
85
  'anyType' => 'xsd:anyType',
86
  'InlineType' => 'xs:anyType',
87
  'date' => 'xsd:date',
88
  'anyURI' => 'xs:anyURI',
89
  'decimal' => 'xsd:decimal',
90
  'gYear' => 'xsd:gYear',
91
  'string' => 'xs:string',
92
  'positiveInteger' => 'xs:positiveInteger',
93
  'time' => 'xs:time',
94
  'double' => 'xs:double',
95
  'integer' => 'xs:integer',
96
  'Name' => 'xs:Name',
97
  'NMTOKEN' => 'xs:NMTOKEN',
98
  'DateTimeISO' => 'xs:string',
99
  'dayOfYearDataType' => 'xs:integer',
100
  'DescriptorType' => 'xs:string',
101
  'FunctionType' => 'xs:string',
102
  'GRingType' => 'xs:anyType',
103
  'IDType' => 'xs:string',
104
  'KeyTypeCode' => 'xs:string',
105
  'MEDIUMINT' => 'xs:decimal',
106
  'NomenclaturalCodesEnum' => 'xsd:Name',
107
  'NomenclaturalTypeStatusOfUnitsEnum' => 'xsd:Name',
108
  'positiveDouble' => 'xs:double',
109
  'probabilityType' => 'xs:double',
110
  'ScopeType' => 'xs:string',
111
  'SMALLINT' => 'xs:decimal',
112
  'STRATUMINDEX' => 'xs:string',
113
  'SystemType' => 'xs:string',
114
  'TaxonomicRankAboveSuperfamilyEnum' => 'xsd:Name',
115
  'TaxonomicRankBelowSubspeciesEnum' => 'xsd:Name',
116
  'TaxonomicRankCultivatedPlants' => 'xsd:Name',
117
  'TaxonomicRankFamilyGroupEnum' => 'xsd:Name',
118
  'TaxonomicRankFamilySubdivisionEnum' => 'xsd:Name',
119
  'TaxonomicRankGenusGroupEnum' => 'xsd:Name',
120
  'TaxonomicRankGenusSubdivisionEnum' => 'xsd:Name',
121
  'TaxonomicRankSpeciesGroupEnum' => 'xsd:Name',
122
);     
123

    
124
$primitiveGroupings = array();
125
$primitiveGroupingsVariableLength = array();
126

    
127
#Find all primitive groupings and primitive groupings of variable length.
128
foreach($files as $file) {
129
  global $primitiveTypes;
130
  global $primitiveGroupings;
131
  global $primitiveGroupingsVariableLength;
132

    
133
  $xmlDoc = new DOMDocument();
134
  $xmlDoc->load($file);
135
  $nodes = $xmlDoc->getElementsByTagName("*");
136
  foreach($nodes as $node) {
137
    $variable = false;
138
    $isPrimitiveGroup = true;
139
    $nodeName = preg_replace("/xs:/","xsd:",$node->nodeName);
140
    if(!isIgnoreType($nodeName)) {
141
      if($nodeName == 'xsd:complexType' || $nodeName == 'xsd:simpleType') {
142
        $actualName = $node->getAttribute('name');
143
        $actualName = preg_replace("/.*:/","",$actualName);
144

    
145
        #Empty string is not a type
146
        if($actualName == '') {
147
          $isPrimitiveGroup = false; 
148
        }
149

    
150
        #Don't need to evaluate if we know it to be primitive
151
        if(array_key_exists($actualName,$primitiveTypes)) { 
152
          $isPrimitiveGroup = false; 
153
        }
154

    
155
        #maxOccurs can show up in several child node types, so just look for it in all of them
156
        $elements = $node->getElementsByTagName("*");
157
        foreach($elements as $element) {
158
          $maxOccurs = $element->getAttribute('maxOccurs');
159
          if($maxOccurs == 'unbounded' || $maxOccurs > 1) { $variable = true; }
160
        }
161

    
162
        $elements = $node->getElementsByTagName("element");
163
        foreach($elements as $element) {
164
          $elementType = $element->getAttribute('type');
165
          $elementType = preg_replace("/.*:/","",$elementType);
166

    
167
          if($elementType != '' && $elementType != $actualName &&
168
             !array_key_exists($elementType,$primitiveTypes)) {
169
            $isPrimitiveGroup = false;
170
            break;
171
          }
172
        }
173
        if($isPrimitiveGroup) {
174
          if($variable) {
175
            $primitiveGroupingsVariableLength[$actualName] = 1;
176
          } else {
177
            $primitiveGroupings[$actualName] = 1;
178
          }
179
        }
180
      }
181
    }
182
  }
183
}
184

    
185
print "primitive groupings:\n";
186
foreach(array_keys($primitiveGroupings) as $key) {
187
  print $key . "\n";
188
}
189
print "\n\n";
190

    
191
print "primitive groupings variable length:\n";
192
foreach(array_keys($primitiveGroupingsVariableLength) as $key) {
193
  print $key . "\n";
194
}
195

    
196
function alreadyFound($name,$tiers) {
197
  foreach($tiers as $tierArray) {
198
    if(array_key_exists($name,$tierArray)) {
199
      return true;
200
    }
201
  }
202
  return false;
203
}
204

    
205
function findTierEntities($previousTiers) {
206
  global $files;
207
  $returnArray = array();
208
  foreach($files as $file) {
209
  
210
    $xmlDoc = new DOMDocument();
211
    $xmlDoc->load($file);
212
    $nodes = $xmlDoc->getElementsByTagName("*");
213
    foreach($nodes as $node) {
214
      $isInTier = true;
215
      $nodeName = preg_replace("/xs:/","xsd:",$node->nodeName);
216
      if(!isIgnoreType($nodeName)) {
217
        if($nodeName == 'xsd:complexType' || $nodeName == 'xsd:simpleType') {
218
          $actualName = $node->getAttribute('name');
219
          $actualName = preg_replace("/.*:/","",$actualName);
220
  
221
          #Empty string is not a type
222
          if($actualName == '') {
223
            $isInTier = false; 
224
          }
225

    
226
          #Don't need to evaluate if we know it to be in lower tiers
227
          if(alreadyFound($actualName,$previousTiers)) { 
228
            $isInTier = false; 
229
          }
230
  
231
          $elements = $node->getElementsByTagName("element");
232
          foreach($elements as $element) {
233
            $elementType = $element->getAttribute('type');
234
            $elementType = preg_replace("/.*:/","",$elementType);
235
  
236
            if($elementType != '' && $elementType != $actualName &&
237
               !alreadyFound($elementType,$previousTiers)) {
238
              $isInTier = false;
239
              break;
240
            }
241
          }
242
          if($isInTier) {
243
            $returnArray[$actualName] = 1;
244
          }
245
        }
246
      }
247
    }
248
  }
249
  return $returnArray;
250
}
251

    
252
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength);
253
print "\n\n";
254
$tier1 = findTierEntities($prevTiers);
255
print "tier 1 entities:\n";
256
foreach(array_keys($tier1) as $key) {
257
  print $key . "\n";
258
}
259

    
260
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1);
261
print "\n\n";
262
$tier2 = findTierEntities($prevTiers);
263
print "tier 2 entities:\n";
264
foreach(array_keys($tier2) as $key) {
265
  print $key . "\n";
266
}
267

    
268
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1,$tier2);
269
print "\n\n";
270
$tier3 = findTierEntities($prevTiers);
271
print "tier 3 entities:\n";
272
foreach(array_keys($tier3) as $key) {
273
  print $key . "\n";
274
}
275

    
276
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1,$tier2,$tier3);
277
print "\n\n";
278
$tier4 = findTierEntities($prevTiers);
279
print "tier 4 entities:\n";
280
foreach(array_keys($tier4) as $key) {
281
  print $key . "\n";
282
}
283

    
284
$prevTiers = array($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1,$tier2,$tier3,$tier4);
285
print "\n\n";
286
$tier5 = findTierEntities($prevTiers);
287
print "tier 5 entities:\n";
288
foreach(array_keys($tier5) as $key) {
289
  print $key . "\n";
290
}
291

    
292
#5 is as deep as we get
293
$tot = array_merge($primitiveTypes,$primitiveGroupings,$primitiveGroupingsVariableLength,$tier1,$tier2,$tier3,$tier4);
294
print count($tot) . "\n";
295

    
296
?>
297

    
(3-3/7)