# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""An extensible ASCII table reader and writer.
fixedwidth.py:
Read or write a table with fixed width columns.
:Copyright: Smithsonian Astrophysical Observatory (2011)
:Author: Tom Aldcroft (aldcroft@head.cfa.harvard.edu)
"""
from __future__ import absolute_import, division, print_function
from ...extern.six.moves import zip
from ...table.column import col_getattr
from . import core
from .core import InconsistentTableError, DefaultSplitter
from . import basic
[docs]class FixedWidthSplitter(core.BaseSplitter):
"""
Split line based on fixed start and end positions for each ``col`` in
``self.cols``.
This class requires that the Header class will have defined ``col.start``
and ``col.end`` for each column. The reference to the ``header.cols`` gets
put in the splitter object by the base Reader.read() function just in time
for splitting data lines by a ``data`` object.
Note that the ``start`` and ``end`` positions are defined in the pythonic
style so line[start:end] is the desired substring for a column. This splitter
class does not have a hook for ``process_lines`` since that is generally not
useful for fixed-width input.
"""
delimiter_pad = ''
bookend = False
delimiter = '|'
[docs] def __call__(self, lines):
for line in lines:
vals = [line[x.start:x.end] for x in self.cols]
if self.process_val:
yield [self.process_val(x) for x in vals]
else:
yield vals
[docs] def join(self, vals, widths):
pad = self.delimiter_pad or ''
delimiter = self.delimiter or ''
padded_delim = pad + delimiter + pad
if self.bookend:
bookend_left = delimiter + pad
bookend_right = pad + delimiter
else:
bookend_left = ''
bookend_right = ''
vals = [' ' * (width - len(val)) + val for val, width in zip(vals, widths)]
return bookend_left + padded_delim.join(vals) + bookend_right
class FixedWidthHeaderSplitter(DefaultSplitter):
'''Splitter class that splits on ``|``.'''
delimiter = '|'
[docs]class FixedWidthHeader(basic.BasicHeader):
"""
Fixed width table header reader.
"""
splitter_class = FixedWidthHeaderSplitter
""" Splitter class for splitting data lines into columns """
position_line = None # secondary header line position
""" row index of line that specifies position (default = 1) """
set_of_position_line_characters = set(r'`~!#$%^&*-_+=\|":' + "'")
[docs] def get_line(self, lines, index):
for i, line in enumerate(self.process_lines(lines)):
if i == index:
break
else: # No header line matching
raise InconsistentTableError('No header line found in table')
return line
[docs] def get_cols(self, lines):
"""
Initialize the header Column objects from the table ``lines``.
Based on the previously set Header attributes find or create the column names.
Sets ``self.cols`` with the list of Columns.
Parameters
----------
lines : list
List of table lines
"""
# See "else" clause below for explanation of start_line and position_line
start_line = core._get_line_index(self.start_line, self.process_lines(lines))
position_line = core._get_line_index(self.position_line, self.process_lines(lines))
# If start_line is none then there is no header line. Column positions are
# determined from first data line and column names are either supplied by user
# or auto-generated.
if start_line is None:
if position_line is not None:
raise ValueError("Cannot set position_line without also setting header_start")
data_lines = self.data.process_lines(lines)
if not data_lines:
raise InconsistentTableError(
'No data lines found so cannot autogenerate column names')
vals, starts, ends = self.get_fixedwidth_params(data_lines[0])
self.names = [self.auto_format % i for i in range(1, len(vals) + 1)]
else:
# This bit of code handles two cases:
# start_line = <index> and position_line = None
# Single header line where that line is used to determine both the
# column positions and names.
# start_line = <index> and position_line = <index2>
# Two header lines where the first line defines the column names and
# the second line defines the column positions
if position_line is not None:
# Define self.col_starts and self.col_ends so that the call to
# get_fixedwidth_params below will use those to find the header
# column names. Note that get_fixedwidth_params returns Python
# slice col_ends but expects inclusive col_ends on input (for
# more intuitive user interface).
line = self.get_line(lines, position_line)
if len(set(line) - set([self.splitter.delimiter, ' '])) != 1:
raise InconsistentTableError('Position line should only contain delimiters and one other character, e.g. "--- ------- ---".')
# The line above lies. It accepts white space as well.
# We don't want to encourage using three different
# characters, because that can cause ambiguities, but white
# spaces are so common everywhere that practicality beats
# purity here.
charset = self.set_of_position_line_characters.union(set([self.splitter.delimiter, ' ']))
if not set(line).issubset(charset):
raise InconsistentTableError('Characters in position line must be part of {0}'.format(charset))
vals, self.col_starts, col_ends = self.get_fixedwidth_params(line)
self.col_ends = [x - 1 for x in col_ends]
# Get the header column names and column positions
line = self.get_line(lines, start_line)
vals, starts, ends = self.get_fixedwidth_params(line)
self.names = vals
self._set_cols_from_names()
# Set column start and end positions.
for i, col in enumerate(self.cols):
col.start = starts[i]
col.end = ends[i]
[docs] def get_fixedwidth_params(self, line):
"""
Split ``line`` on the delimiter and determine column values and
column start and end positions. This might include null columns with
zero length (e.g. for ``header row = "| col1 || col2 | col3 |"`` or
``header2_row = "----- ------- -----"``). The null columns are
stripped out. Returns the values between delimiters and the
corresponding start and end positions.
Parameters
----------
line : str
Input line
Returns
-------
vals : list
List of values.
starts : list
List of starting indices.
ends : list
List of ending indices.
"""
# If column positions are already specified then just use those, otherwise
# figure out positions between delimiters.
if self.col_starts is not None and self.col_ends is not None:
starts = list(self.col_starts) # could be any iterable, e.g. np.array
ends = [x + 1 for x in self.col_ends] # user supplies inclusive endpoint
if len(starts) != len(ends):
raise ValueError('Fixed width col_starts and col_ends must have the same length')
vals = [line[start:end].strip() for start, end in zip(starts, ends)]
else:
# There might be a cleaner way to do this but it works...
vals = line.split(self.splitter.delimiter)
starts = [0]
ends = []
for val in vals:
if val:
ends.append(starts[-1] + len(val))
starts.append(ends[-1] + 1)
else:
starts[-1] += 1
starts = starts[:-1]
vals = [x.strip() for x in vals if x]
if len(vals) != len(starts) or len(vals) != len(ends):
raise InconsistentTableError('Error parsing fixed width header')
return vals, starts, ends
[docs] def write(self, lines):
# Header line not written until data are formatted. Until then it is
# not known how wide each column will be for fixed width.
pass
[docs]class FixedWidthData(basic.BasicData):
"""
Base table data reader.
"""
splitter_class = FixedWidthSplitter
""" Splitter class for splitting data lines into columns """
[docs] def write(self, lines):
vals_list = []
col_str_iters = self.str_vals()
for vals in zip(*col_str_iters):
vals_list.append(vals)
for i, col in enumerate(self.cols):
col.width = max([len(vals[i]) for vals in vals_list])
if self.header.start_line is not None:
col.width = max(col.width, len(col_getattr(col, 'name')))
widths = [col.width for col in self.cols]
if self.header.start_line is not None:
lines.append(self.splitter.join([col_getattr(col, 'name') for col in self.cols],
widths))
if self.header.position_line is not None:
char = self.header.position_char
if len(char) != 1:
raise ValueError('Position_char="%s" must be a single character' % char)
vals = [char * col.width for col in self.cols]
lines.append(self.splitter.join(vals, widths))
for vals in vals_list:
lines.append(self.splitter.join(vals, widths))
return lines
[docs]class FixedWidth(basic.Basic):
"""
Read or write a fixed width table with a single header line that defines column
names and positions. Examples::
# Bar delimiter in header and data
| Col1 | Col2 | Col3 |
| 1.2 | hello there | 3 |
| 2.4 | many words | 7 |
# Bar delimiter in header only
Col1 | Col2 | Col3
1.2 hello there 3
2.4 many words 7
# No delimiter with column positions specified as input
Col1 Col2Col3
1.2hello there 3
2.4many words 7
See the :ref:`fixed_width_gallery` for specific usage examples.
"""
_format_name = 'fixed_width'
_description = 'Fixed width'
header_class = FixedWidthHeader
data_class = FixedWidthData
def __init__(self, col_starts=None, col_ends=None, delimiter_pad=' ', bookend=True):
super(FixedWidth, self).__init__()
self.data.splitter.delimiter_pad = delimiter_pad
self.data.splitter.bookend = bookend
self.header.col_starts = col_starts
self.header.col_ends = col_ends
class FixedWidthNoHeaderHeader(FixedWidthHeader):
'''Header reader for fixed with tables with no header line'''
start_line = None
class FixedWidthNoHeaderData(FixedWidthData):
'''Data reader for fixed width tables with no header line'''
start_line = 0
[docs]class FixedWidthNoHeader(FixedWidth):
"""
Read or write a fixed width table which has no header line. Column
names are either input (``names`` keyword) or auto-generated. Column
positions are determined either by input (``col_starts`` and ``col_stops``
keywords) or by splitting the first data line. In the latter case a
``delimiter`` is required to split the data line.
Examples::
# Bar delimiter in header and data
| 1.2 | hello there | 3 |
| 2.4 | many words | 7 |
# Compact table having no delimiter and column positions specified as input
1.2hello there3
2.4many words 7
This class is just a convenience wrapper around the ``FixedWidth`` reader
but with ``header.start_line = None`` and ``data.start_line = 0``.
See the :ref:`fixed_width_gallery` for specific usage examples.
"""
_format_name = 'fixed_width_no_header'
_description = 'Fixed width with no header'
header_class = FixedWidthNoHeaderHeader
data_class = FixedWidthNoHeaderData
def __init__(self, col_starts=None, col_ends=None, delimiter_pad=' ', bookend=True):
super(FixedWidthNoHeader, self).__init__(col_starts, col_ends,
delimiter_pad=delimiter_pad, bookend=bookend)
class FixedWidthTwoLineHeader(FixedWidthHeader):
'''Header reader for fixed width tables splitting on whitespace.
For fixed width tables with several header lines, there is typically
a white-space delimited format line, so splitting on white space is
needed.
'''
splitter_class = DefaultSplitter
class FixedWidthTwoLineDataSplitter(FixedWidthSplitter):
'''Splitter for fixed width tables splitting on ``' '``.'''
delimiter = ' '
class FixedWidthTwoLineData(FixedWidthData):
'''Data reader for fixed with tables with two header lines.'''
splitter_class = FixedWidthTwoLineDataSplitter
[docs]class FixedWidthTwoLine(FixedWidth):
"""
Read or write a fixed width table which has two header lines. The first
header line defines the column names and the second implicitly defines the
column positions. Examples::
# Typical case with column extent defined by ---- under column names.
col1 col2 <== header_start = 0
----- ------------ <== position_line = 1, position_char = "-"
1 bee flies <== data_start = 2
2 fish swims
# Pretty-printed table
+------+------------+
| Col1 | Col2 |
+------+------------+
| 1.2 | "hello" |
| 2.4 | there world|
+------+------------+
See the :ref:`fixed_width_gallery` for specific usage examples.
"""
_format_name = 'fixed_width_two_line'
_description = 'Fixed width with second header line'
data_class = FixedWidthTwoLineData
header_class = FixedWidthTwoLineHeader
def __init__(self, position_line=1, position_char='-', delimiter_pad=None, bookend=False):
super(FixedWidthTwoLine, self).__init__(delimiter_pad=delimiter_pad, bookend=bookend)
self.header.position_line = position_line
self.header.position_char = position_char
self.data.start_line = position_line + 1