Project

General

Profile

1 73 aaronmk
# String manipulation
2
3 1295 aaronmk
import codecs
4 861 aaronmk
import re
5
6 1232 aaronmk
import util
7
8 4028 aaronmk
##### Sentinel values
9
10
class NonInternedStr(str):
11
    '''Each instance is unique and does not compare identical with `is`.'''
12
    pass
13
14
none_str = NonInternedStr()
15 4032 aaronmk
isspace_none_str = '\r\n\t\v' # clone-safe, but must be compared with ==
16 4028 aaronmk
17 1622 aaronmk
##### Parsing
18
19 4115 aaronmk
def raw_extra_len(str_):
20
    '''Calculates the difference between the character length and byte length of
21
    a string. These lengths will differ for Unicode strings containing
22
    multi-byte characters.'''
23
    return len(to_raw_str(str_))-len(str_)
24
25 3749 aaronmk
def concat(str0, str1, max_len):
26 4116 aaronmk
    '''Concatenates two strings, ensuring that the combined byte length is no
27
    greater than the provided length limit.
28
    Note that if the combined length is longer than max_len, the truncated
29
    length may sometimes be shorter than max_len if there are multi-byte
30
    characters after str0's truncation point. Thus, you *cannot determine if the
31
    string was truncated* by checking if the new length equals max_len.
32
    '''
33
    # Use raw_extra_len() because Unicode characters can be multi-byte, and
34
    # length limits often apply to the byte length, not the character length.
35
    max_len -= raw_extra_len(str0)+raw_extra_len(str1)
36
    return str0[:max_len-len(str1)]+str1
37 2932 aaronmk
38 1622 aaronmk
def split(sep, str_):
39
    '''Returns [] if str_ == ""'''
40
    if str_ == '': return []
41
    else: return str_.split(sep)
42
43 4937 aaronmk
def remove_prefix(prefix, str_, removed_ref=None, require=False):
44 2221 aaronmk
    if removed_ref == None: removed_ref = [False]
45
46
    removed_ref[0] = str_.startswith(prefix)
47
    if removed_ref[0]: return str_[len(prefix):]
48 4937 aaronmk
    elif require: raise Exception(urepr(str_)+' needs '+urepr(prefix)+' prefix')
49 1622 aaronmk
    else: return str_
50
51 1680 aaronmk
def remove_prefixes(prefixes, str_):
52
    for prefix in prefixes: str_ = remove_prefix(prefix, str_)
53
    return str_
54
55
def with_prefixes(prefixes, str_): return (p+str_ for p in prefixes)
56
57 4937 aaronmk
def remove_suffix(suffix, str_, removed_ref=None, require=False):
58 2221 aaronmk
    if removed_ref == None: removed_ref = [False]
59
60
    removed_ref[0] = str_.endswith(suffix)
61
    if removed_ref[0]: return str_[:-len(suffix)]
62 4937 aaronmk
    elif require: raise Exception(urepr(str_)+' needs '+urepr(suffix)+' suffix')
63 1622 aaronmk
    else: return str_
64
65 1959 aaronmk
def contains_any(haystack, needles):
66
    for needle in needles:
67
        if haystack.find(needle) >= 0: return True
68
    return False
69
70 1622 aaronmk
def overlaps(str0, str1): return str0.find(str1) >= 0 or str1.find(str0) >= 0
71
72 5147 aaronmk
def flip_map(map_):
73
    '''
74
    For use with replace_all(), replace_all_re().
75
    @param map_ [(from_, to), ...]
76
    @return [(to, from_), ...]
77
    '''
78
    return [(to, from_) for from_, to in map_]
79
80
def replace_all(map_, str_):
81
    '''
82
    @param map_ [(from_, to), ...]
83
    '''
84
    for from_, to in map_: str_ = str_.replace(from_, to)
85
    return str_
86
87
def replace_all_re(map_, str_):
88
    '''
89
    @param map_ [(from_, to), ...]
90
    '''
91
    for from_, to in map_: str_ = re.sub(from_, to, str_)
92
    return str_
93
94 2761 aaronmk
##### Escaping
95
96 5142 aaronmk
def esc_quotes(str_, quote='"', esc='\\', quote_esc=None):
97
    if quote_esc == None: quote_esc = esc+quote
98
99
    if esc != quote: str_ = str_.replace(esc, esc+esc)
100
    str_ = str_.replace(quote, quote_esc)
101
    return str_
102
103 5143 aaronmk
def json_encode(str_): return esc_quotes(str_, '"')
104
105 2761 aaronmk
def esc_for_mogrify(query):
106
    '''Escapes a query right before being passed to a mogrifying function.'''
107
    return query.replace('%', '%%')
108
109 5148 aaronmk
def regexp_repl_esc(str_): return lambda m: str_
110
111 1401 aaronmk
##### Unicode
112
113 2882 aaronmk
def to_raw_str(str_):
114
    if isinstance(str_, unicode): str_ = str_.encode('utf_8')
115
    return str_
116
117 1295 aaronmk
unicode_reader = codecs.getreader('utf_8')
118
119 73 aaronmk
def to_unicode(str_):
120
    if isinstance(str_, unicode): return str_
121
    encodings = ['utf_8', 'latin_1']
122
    for encoding in encodings:
123
        try: return unicode(str_, encoding)
124
        except UnicodeDecodeError, e: pass
125
    raise AssertionError(encoding+' is not a catch-all encoding')
126 340 aaronmk
127 3748 aaronmk
def ustr(value):
128 1232 aaronmk
    '''Like built-in str() but converts to unicode object'''
129 3748 aaronmk
    if util.is_str(value): str_ = value # already a string
130
    elif hasattr(value, '__str__'): str_ = value.__str__()
131
    else: str_ = str(value)
132
    return to_unicode(str_)
133 1232 aaronmk
134 3747 aaronmk
def urepr(value):
135
    '''Like built-in repr() but converts to unicode object'''
136
    if hasattr(value, '__repr__'): str_ = value.__repr__()
137
    else: str_ = repr(value)
138
    return to_unicode(str_)
139
140 2502 aaronmk
def repr_no_u(value):
141
    '''Like built-in repr() but removes the "u" in `u'...'`'''
142 3747 aaronmk
    return re.sub(r"^u(?=')", r'', urepr(value))
143 2502 aaronmk
144 1401 aaronmk
##### Line endings
145
146 1622 aaronmk
def extract_line_ending(line):
147
    '''@return tuple (contents, ending)'''
148
    contents = remove_suffix('\r', remove_suffix('\n', line))
149
    return (contents, line[len(contents):])
150 714 aaronmk
151 1622 aaronmk
def remove_line_ending(line): return extract_line_ending(line)[0]
152
153
def ensure_newl(str_): return remove_line_ending(str_)+'\n'
154
155 856 aaronmk
def is_multiline(str_):
156
    newl_idx = str_.find('\n')
157
    return newl_idx >= 0 and newl_idx != len(str_)-1 # has newline before end
158
159 860 aaronmk
def remove_extra_newl(str_):
160 856 aaronmk
    if is_multiline(str_): return str_
161 2480 aaronmk
    else: return str_.rstrip('\n')
162 856 aaronmk
163 714 aaronmk
def std_newl(str_): return str_.replace('\r\n', '\n').replace('\r', '\n')
164
165 3255 aaronmk
def join_lines(lines): return ''.join((l+'\n' for l in lines))
166
167 1401 aaronmk
##### Whitespace
168
169 714 aaronmk
def cleanup(str_): return std_newl(str_.strip())
170 861 aaronmk
171 1364 aaronmk
def single_space(str_): return re.sub(r' {2,}', r' ', str_.strip())
172
173 861 aaronmk
def one_line(str_): return re.sub(r'\n *', r' ', cleanup(str_))
174 1761 aaronmk
175
##### Control characters
176
177
def is_ctrl(char):
178
    '''Whether char is a (non-printable) control character'''
179
    return ord(char) < 32 and not char.isspace()
180
181
def strip_ctrl(str_):
182
    '''Strips (non-printable) control characters'''
183
    return ''.join(filter(lambda c: not is_ctrl(c), str_))
184 2471 aaronmk
185 3172 aaronmk
##### Text
186
187
def first_word(str_): return str_.partition(' ')[0]
188
189 2471 aaronmk
##### Formatting
190
191 3466 aaronmk
def indent(str_, level=1, indent_str='    '):
192
    indent_str *= level
193
    return ('\n'.join((indent_str+l for l in str_.rstrip().split('\n'))))+'\n'
194
195 2477 aaronmk
def as_tt(str_): return '@'+str_+'@'
196
197 2475 aaronmk
def as_code(str_, lang=None, multiline=True):
198 2471 aaronmk
    '''Wraps a string in Redmine tags to syntax-highlight it.'''
199 2480 aaronmk
    str_ = '\n'+str_.rstrip('\n')+'\n'
200 2471 aaronmk
    if lang != None: str_ = '<code class="'+lang+'">'+str_+'</code>'
201 2475 aaronmk
    if multiline: str_ = '<pre>'+str_+'</pre>'
202
    return str_
203 2477 aaronmk
204 2504 aaronmk
def as_inline_table(dict_, key_label='Output', value_label='Input', ustr=ustr):
205 2477 aaronmk
    '''Wraps a dict in Redmine tags to format it as a table.'''
206
    str_ = ''
207 2481 aaronmk
    def row(entry): return (': '.join(entry))+'\n'
208 2477 aaronmk
    str_ += row([key_label, value_label])
209 2481 aaronmk
    for entry in dict_.iteritems(): str_ += row([ustr(v) for v in entry])
210
    return '<pre>\n'+str_+'</pre>'
211 2482 aaronmk
212 2504 aaronmk
def as_table(dict_, key_label='Output', value_label='Input', ustr=ustr):
213 2482 aaronmk
    '''Wraps a dict in Redmine tags to format it as a table.'''
214
    str_ = ''
215
    def row(entry): return ('|'.join(['']+entry+['']))+'\n'# '' for outer border
216
    str_ += row([key_label, value_label])
217
    for entry in dict_.iteritems(): str_ += row([as_tt(ustr(v)) for v in entry])
218
    return '\n'+str_+' ' # space protects last \n so blank line ends table