Revision 4116
Added by Aaron Marcuse-Kubitza over 12 years ago
lib/strings.py | ||
---|---|---|
23 | 23 |
return len(to_raw_str(str_))-len(str_) |
24 | 24 |
|
25 | 25 |
def concat(str0, str1, max_len): |
26 |
# Use to_unicode so that substring does not split Unicode characters |
|
27 |
str0, str1 = map(to_unicode, [str0, str1]) |
|
28 |
# Use to_raw_str() because Unicode characters can be multi-byte, and length |
|
29 |
# limits often apply to the byte length, not the character length. |
|
30 |
return str0[:max_len-len(to_raw_str(str1))]+str1 |
|
26 |
'''Concatenates two strings, ensuring that the combined byte length is no |
|
27 |
greater than the provided length limit. |
|
28 |
Note that if the combined length is longer than max_len, the truncated |
|
29 |
length may sometimes be shorter than max_len if there are multi-byte |
|
30 |
characters after str0's truncation point. Thus, you *cannot determine if the |
|
31 |
string was truncated* by checking if the new length equals max_len. |
|
32 |
''' |
|
33 |
# Use raw_extra_len() because Unicode characters can be multi-byte, and |
|
34 |
# length limits often apply to the byte length, not the character length. |
|
35 |
max_len -= raw_extra_len(str0)+raw_extra_len(str1) |
|
36 |
return str0[:max_len-len(str1)]+str1 |
|
31 | 37 |
|
32 | 38 |
def split(sep, str_): |
33 | 39 |
'''Returns [] if str_ == ""''' |
Also available in: Unified diff
strings.py: concat(): Apply length limits by shrinking max_len by new raw_extra_len() of the strings. This also fixes a bug where multi-byte characters in str0 were not properly taken into account, leading to overly long strings. Added doc comment.