Project

General

Profile

« Previous | Next » 

Revision 5149

tnrs.py: Added encode(), decode(), decode_for_tsv(), and TnrsOutputStream to handle escaping TNRS-invalid characters

View differences:

lib/tnrs.py
5 5
import time
6 6
import urllib2
7 7

  
8
import csvs
8 9
import exc
9 10
import profiling
10 11
import streams
......
59 60
def gwt_encode(str_):
60 61
    return strings.esc_quotes(strings.json_encode(str_), '|', quote_esc='\!')
61 62

  
63
def make_spliced_decode_map(decode_map):
64
    return [(r'(?: |(?<=\t)|^)'+re.escape(from_.strip())+r'(?: |(?=\t)|$)',
65
        strings.regexp_repl_esc(to)) for from_, to in decode_map]
66

  
67
encode_map = [
68
    ('!', ' !exc '), # escape char
69
    ('\t', ' !tab '),
70
    ('\n', ' !nl '),
71
    ('\r', ' !cr '),
72
    ('%', ' !pct '),
73
    ('&', ' !amp '),
74
    (';', ' !sem '),
75
    ('\\', ' !bsl '),
76
]
77
decode_map = strings.flip_map(encode_map)
78
spliced_decode_map = make_spliced_decode_map(decode_map)
79

  
80
def encode(str_): return strings.replace_all(encode_map, str_)
81

  
82
def decode(str_): return strings.replace_all_re(spliced_decode_map, str_)
83

  
84
decode_for_tsv_map = make_spliced_decode_map([(from_, strings.replace_all(
85
    csvs.tsv_encode_map, to)) for from_, to in decode_map])
86

  
87
def decode_for_tsv(str_):
88
    return strings.replace_all_re(decode_for_tsv_map, str_)
89

  
90
class TnrsOutputStream(streams.FilterStream):
91
    '''Decodes a TNRS response whose names were encoded with encode()'''
92
    def __init__(self, stream):
93
        streams.FilterStream.__init__(self, decode_for_tsv, stream)
94

  
62 95
def parse_response(name, pattern, str_, response, response_info):
63 96
    match = re.match(pattern, str_)
64 97
    if not match:

Also available in: Unified diff