/bin/review - BIEN 3 - NCEAS Projects

root/bin/review @ 7524

       #!/usr/bin/env python
       # Converts a map spreadsheet to human-readable (but machine unusable) form
       # Usage: self [col_num...] <in_map >out_map
       import csv
       import re
       import sys
       def sub_nested(regex, repl, str_):
           while True:
               str_, n = re.subn(regex, repl, str_)
               if n == 0: return str_
       def cleanup(xpath):
           truncated = False
           # Remove attrs
           xpath = sub_nested(r':\[[^\[\]]*?\]', r'', xpath)
           # Remove keys except last
           xpath = sub_nested(r':?\[[^\[\]]*?\](?=.*\[[^\[\]]*?\].*?)', r'', xpath)
           # Remove lookahead assertions
           xpath = sub_nested(r'\((/[^\)]*?)\)(?=/)', r'\1', xpath)
           # Remove pointers
           xpath, n = re.subn(r'^.*->', r'', xpath)
           if n > 0: truncated = True
           # Remove part of path before first key list, XML function, or path end
           # Leave enough to include the table of a user-defined value
           xpath, n = re.subn(r'^(?:/(?!_)[\w*]+)*(?=(?:/(?!_)[\w*]+){2}(?:\[|/_|$))',
               r'', xpath)
           if n > 0: truncated = True
           # Remove XML functions other than unit conversions
           xpath = re.sub(r'/_(?!\w+_to_)\w+/\w+', r'', xpath)
           # Remove backward (child-to-parent) pointer's target ID attr
           xpath = re.sub(r'\[[\w*]+\]|(?<=\[)[\w*]+,', r'', xpath)
           # Remove negative keys
           xpath = re.sub(r',?!(?:[\w*]+/)*@?[\w*]+', r'', xpath)
           # Remove path before key
           xpath = re.sub(r'(?:[\w*]+/)*(@?[\w*]+)(?==)', r'\1', xpath)
           # Prepend / to show truncation
           if truncated: xpath = '/'+xpath
           return xpath
       def main():
           col_nums = sys.argv[1:]
           if col_nums == []: col_nums = range(2)
           col_nums = map(int, col_nums) # 0-based
           # Convert map
           reader = csv.reader(sys.stdin)
           writer = csv.writer(sys.stdout)
           writer.writerow(reader.next())
           for row in reader:
               for i in col_nums: row[i] = cleanup(row[i])
               writer.writerow(row)
       main()

(62-62/80)

Project

General

Profile