#!/usr/bin/python3

from collections import defaultdict

from span import *

__all__ = 'markup_table save_table_class_script'.split()

# The table markup format is described in The Dauug House under
# "Tables ... Basic markup for tables"
#
# TODO what if a table doesn't have any rows/columns?
def markup_table(block: list[str], current_specials: dict[str, str]) -> str:
    starts: list[int] = []          # column start positions
    cells: list[list[str]] = []     # rows and columns of table text

    # The first line defines column start positions.
    prev_char = False
    csp_line = block[0]
    for i, c in enumerate(csp_line):
        if c != prev_char and not c.isspace():
            starts.append(i)
        prev_char = c
    n_cols = len(starts)

    # The default cell delimiter is a Unicode MIDDLE DOT. We can change it.
    delim = current_specials['\u00b7']

    # Here is 'starts' with an appended None. It's for slicing.
    ranges: list[int|None] = starts + [None]

    # Subsequent lines contain table data.
    for line in block[1:]:
        if not line:
            continue                # pretty blank line
        if line == csp_line:
            continue                # pretty dashes or something

        # Is this line delimited?
        if delim in line:
            # Yes, so separate on delimiters. Clamp to stated # of columns.
            # Insert empty cells if needed to fill a row. Trim whitespace.
            row = line.split(delim, n_cols - 1)
            while len(row) < n_cols:
                row.append('')
            for i in range(n_cols):
                row[i] = row[i].strip()
            cells.append(row)
            continue

        # Line is not delimited, so we use start positions instead.
        # Do strip whitespace from each cell.
        row = []
        for i in range(len(starts)):
            row.append(line[ranges[i]:ranges[i+1]].strip())
        cells.append(row)

    # Determine what classes go where for this table.
    TABLE, TR, TD = run_table_class_script(cells, tc_script, tc_space)

    # All the cells are here. Let's make the HTML.
    classes = ' class="%s"' % TABLE[True] if True in TABLE else ''
    html = ['<div class="scrolly"><table%s>' % classes]
    for r, row in enumerate(cells):
        classes = ' class="%s"' % TR[r] if r in TR else ''
        html.append('  <tr%s>' % classes)
        for c, col in enumerate(row):
            # FIXME inefficient
            classes = ' class="%s"' % TD[(c, r)] if (c, r) in TD else ''
            html.append('    <td%s>%s</td>' % (classes, span(current_specials, col)))
        html.append('  </tr>')
    html.append('</table></div>\n')

    return '\n'.join(html)

# Parse a 'T' block into tokens, saving it in 'tc_script'.
def save_table_class_script(block: list[str], current_specials: dict[str, str]):
    global tc_space
    tc_space = current_specials['\u00b7']
    tc_script.clear()
    for s in block:
        tc_script.extend(s.split())

tc_script: list[str] = []
tc_space: str = '\u00b7'

# Parse column-row "spreadsheet" notation, clamping output range within
# 1..cols, 1..rows. Output is (col, row) or (None, None), depending on whether
# input is valid. The output is zero-based, so "b5" will return (1, 4).
# Not case sensitive. Some examples of valid input are:
#
#     3  C  c15  aB5  J007
#
# Column will be A if not specified. Row will be 1 if not specified.
#
def parse_a1(s:str, cols: int, rows: int) -> tuple[int | None, int | None]:

    # Split s into a letters and digits portion.
    lets, digs = '', s.lower()
    while digs[:1].islower():
        digs, lets = digs[1:], lets + digs[0]
    if digs and not digs.isdigit():
        return None, None               # not in A1 format

    # Parsing the digits is easy. Go for zero-based.
    row = int(digs) - 1 if digs else 0

    # Parse the letters in base 26. Result is also zero-based.
    col = 0
    while lets:
        col = 26 * col + ord(lets[0]) - 97
        lets = lets[1:]

    # Clamp result to be within table. Neither cols nor rows is 0.
    col = min(col, cols - 1)
    row = min(row, rows - 1)
    return (col, row)

# Given a table and a table class script, produce dicts of what classes
# get applied to <table>, <tr>, and <td> tags. Dicts are
# output in that order. The caller only requires a space-separated list
# for each, so that's how everything is returned.
#
# Note that <colgroup> is not okay for putting styles on columns, and
# the nth-child selector requires a hard-coded position in the CSS.
# So the only option left is to put the style on every last <td>.
#
# The script format is described in The Dauug House under
# "Tables ... Advanced markup for tables"
def run_table_class_script(
    table: list[list[str]],
    script: list[str],
    space_char: str
) -> tuple[
    dict[bool, str],
    dict[int, str],
    dict[tuple[int, int], str]
]:
    # Table dimensions.
    n_cols, n_rows = len(table[0]), len(table)

    # This creates many variables that need reset after each /mark.
    reinit = True
    if not script:
        script = ['/mark']

    # These are lists of classes and -classes for each element type,
    # stratified by row and/or column as appropriate. They will be
    # deduplicated and strung before being returned.
    table_classes: dict[bool, list[str]] = defaultdict(list)
    tr_classes: dict[int, list[str]] = defaultdict(list)
    td_classes: dict[tuple[int, int], list[str]] = defaultdict(list)

    for t in script:                    # t is a script token.

        if reinit:
            tables: set[bool] = set()           # is table selected?
            rows: set[int] = set()              # set of selected rows
            cols: set[int] = set()              # set of selected columns
            cells: set[tuple[int, int]] = set() # set of selected cells

            stage: list[str] = []               # tokens for later command
            delete: list[str] = []              # search text to delete

            del_search_text = False             # delete search text?
            reinit = False                      # still need this done?

        # Non-commands get pushed on the stage.
        if not t.startswith('/'):
            stage.append(t)
            continue

        #
        # This section is for commands that don't touch the stage.
        #

        if t == '/del':                 # delete search from cells after use
            del_search_text = True
            continue

        # /mark consumes the stage as a list of styles to include or exclude,
        # adding contents to the selected tables, rows, columns, or cells.
        # It also queues to reset all intermediate work to empty.
        if t == '/mark':
            for true in tables:
                in_or_out(table_classes, true, stage)
            for r in rows:
                in_or_out(tr_classes, r, stage)
            for c, r in cells:
                in_or_out(td_classes, (c, r), stage)

            # Delete search text, which presumably was for identification only.
            if del_search_text:
                for r in range(n_rows):
                    for c in range(n_cols):
                        for bye in delete:
                            table[r][c] = table[r][c].replace(bye, '')

            reinit = True
            continue

        #
        # Commands below consume the stage for cell identification, either
        # by finding search text within cells or by A1-style positioning.
        #

        for el in stage:

            # Does el identify a cell in A1 format?
            col, row = parse_a1(el, n_cols, n_rows)
            if col is not None:
                # Neither is row None. This is a cell. Add to the cell group.
                cells.add((col, row))
                continue

            # el isn't A1 format, but literal text to find within all cells.
            # Start by processing any spacing markup so that embedded spaces
            # can be used in search. Leading and trailing space search is
            # not supported. XXX
            needle = el.replace(space_char, ' ').strip()

            # Find every cell containing this text, and add to cells.
            for c in range(n_cols):
                for r in range(n_rows):
                    if needle in table[r][c]:
                        cells.add((c, r))

            # Save this search text so we can delete it later if requested.
            delete.append(needle)

        # Contents of 'stage' have been moved to 'cells', so free 'stage'.
        stage.clear()

        #
        # Here are the commands that use what was on the stage.
        #

        if t == '/table':               # project cells to a containing table
            if cells:
                tables.add(True)
            cells.clear()
            continue

        if t == '/row':                 # project cells to a set of rows
            for c, r in cells:
                rows.add(r)
            cells.clear()
            continue

        if t == '/col':                 # project cells to a set of columns,
            for c, r in cells:          # which due to limitations of CSS,
                cols.add(c)             # has to project back onto cells
            cells.clear()
            for c in cols:
                for r in range(n_rows):
                    cells.add((c, r))
            continue

        if t == '/cell':                # important side effect: stage.clear()
            continue

        if t == '/bb':                  # project cells to their bounding box
            if not cells:
                continue
            c_min, r_min = c_max, r_max = cells.pop()
            for c, r in cells:
                c_min = min(c_min, c)
                r_min = min(r_min, r)
                c_max = max(c_max, c)
                r_max = max(r_max, r)
            for c in range(c_min, 1 + c_max):
                for r in range(r_min, 1 + r_max):
                    cells.add((c, r))
            continue

        oops('invalid table class markup: %s' % t)

    # Build class information for all <table>, <coldata>, <tr>, and <td>.
    # Stratify by which one. (There is only one <table>; it has key True.)
    # Classes to include look like "this". Classes to exclude look like "-this".
    output = []
    for label, d in (
            'table', table_classes), ('tr', tr_classes), ('td', td_classes):
        tag_dict = {}
        for k, vals in d.items():        
            tag_dict[k] = ' '.join(vals)
        output.append(tag_dict)

    return output

# This adds classes like 'this' and removes classes like '-this'
# from dic[key].
def in_or_out(dic, key, classes):
    for c in classes:
        if c.startswith('-'):
            c = c[1:]
            if key in dic:
                while c in dic[key]:
                    dic[key].remove(c)
        else:
            dic[key].append(c)

