Revision 8202
Added by Aaron Marcuse-Kubitza over 11 years ago
csvs.py | ||
---|---|---|
36 | 36 |
|
37 | 37 |
return dialect |
38 | 38 |
|
39 |
def has_unbalanced_quotes(str_): return str_.count('"') % 2 == 1 # odd # of " |
|
40 |
|
|
41 |
def has_multiline_column(str_): return has_unbalanced_quotes(str_) |
|
42 |
|
|
39 | 43 |
def stream_info(stream, parse_header=False): |
40 | 44 |
'''Automatically detects the dialect based on the header line. |
41 | 45 |
Uses the Excel dialect if the CSV file is empty. |
42 | 46 |
@return NamedTuple {header_line, header, dialect}''' |
43 | 47 |
info = util.NamedTuple() |
44 | 48 |
info.header_line = stream.readline() |
49 |
if has_multiline_column(info.header_line): # 1st line not full header |
|
50 |
# assume it's a header-only csv with multiline columns |
|
51 |
info.header_line += ''.join(stream.readlines()) # use entire file |
|
45 | 52 |
info.header = None |
46 | 53 |
if info.header_line != '': |
47 | 54 |
info.dialect = sniff(info.header_line) |
Also available in: Unified diff
lib/csvs.py: stream_info(): Fixed bug where headers with multiline columns were not supported because only the first line (not the first multiline row) is sniffed for the dialect