Reticulum/RNS/Utilities/rngit/util.py

# Reticulum License
#
# Copyright (c) 2016-2026 Mark Qvist
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# - The Software shall not be used in any kind of system which includes amongst
#   its functions the ability to purposefully do harm to human beings.
#
# - The Software shall not be used, directly or indirectly, in the creation of
#   an artificial intelligence, machine learning or language model training
#   dataset, including but not limited to any use that contributes to the
#   training or development of such a model or algorithm.
#
# - The above copyright notice and this permission notice shall be included in
#   all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re
import RNS

class MarkdownToMicron:
    BOLD = "`!"
    BOLD_END = "`!"
    ITALIC = "`*"
    ITALIC_END = "`*"
    UNDERLINE = "`_"
    UNDERLINE_END = "`_"

    CODE_BG = "`BT282828"
    CODE_BG_INLINE = "`BT383838"
    CODE_FG = "`Fddd"
    CODE_RESET = "`f`b"

    LITERAL_START = "`="
    LITERAL_END = "`="

    BULLET = "•"

    # Regex patterns for markdown elements
    HEADER_RE = re.compile(r'^(#{1,6})\s+(.+)$')
    CODE_FENCE_RE = re.compile(r'^(\s*)```(.*)$')
    HORIZONTAL_RULE_RE = re.compile(r'^(\s*)(---+|===+|\*\*\*+|___+)\s*$')
    UNORDERED_LIST_RE = re.compile(r'^(\s*)([-*+])\s+(.+)$')

    # Table patterns
    TABLE_ROW_RE = re.compile(r'^\s*\|?(.+?)\|?\s*$')
    TABLE_SEP_RE = re.compile(r'^\s*\|?(?:\s*:?-+:?\s*\|)+\s*$')

    # Quote pattern
    QUOTE_RE = re.compile(r'^>\s?(.*)$')

    # Inline patterns (processed in order of specificity)
    LINK_RE = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
    INLINE_CODE_RE = re.compile(r'`([^`]+)`')
    BOLD_RE = re.compile(r'\*\*(.+?)\*\*|__(.+?)__')
    ITALIC_RE = re.compile(r'\*(.+?)\*|_(.+?)_')

    TABLE_H = "─"
    TABLE_V = "│"
    TABLE_TL = "┌"
    TABLE_TR = "┐"
    TABLE_BL = "└"
    TABLE_BR = "┘"
    TABLE_ML = "├"
    TABLE_MR = "┤"
    TABLE_TM = "┬"
    TABLE_BM = "┴"
    TABLE_MM = "┼"

    TABLE_MIN_COL_WIDTH = 3

    def __init__(self, max_width=100, syntax_highlighter=None, url_scope=None):
        self.max_width = max_width
        self.local_url_scope = url_scope or ":/page/"
        self.__local_url_scope = self.local_url_scope
        self.syntax_highlighter = syntax_highlighter
        self.wcwidth = None

        try:
            import wcwidth
            self.wcwidth = wcwidth

        except: RNS.log(f"The wcwidth module is unavailable, display width calculations for some glyphs will be incorrect", RNS.LOG_WARNING)

    def set_url_scope(self, url_scope): self.local_url_scope = url_scope
    def restore_url_scope(self, url_scope): self.local_url_scope = self.__local_url_scope

    def display_width(self, text):
        if not self.wcwidth: return len(text)
        else:
            # wcswidth returns -1 for non-printable strings,
            # fallback to len in this case
            w = self.wcwidth.wcswidth(text)
            return w if w is not None and w >= 0 else len(text)

    def format_block(self, text, url_scope=None):
        lines = text.split('\n')
        result_lines = []
        in_code_block = False
        code_block_lang = None
        code_buffer = []
        in_table = False
        table_buffer = []
        in_quote = False
        quote_buffer = []

        def flush_quote_buffer():
            nonlocal result_lines, quote_buffer, in_quote
            if not quote_buffer:
                in_quote = False
                return

            para = " ".join(quote_buffer)
            formatted = self._format_inline(para)

            effective_width = self.max_width - 3
            if effective_width < 1: effective_width = 1
            wrapped_lines = self._wrap_text(formatted, effective_width)
            for wrapped_line in wrapped_lines: result_lines.append(f" │ {wrapped_line}")

            quote_buffer = []
            in_quote = False

        def flush_table_buffer():
            nonlocal result_lines, table_buffer, in_table
            if not table_buffer:
                in_table = False
                return

            if len(table_buffer) >= 2 and self._is_table_separator(table_buffer[1]):
                formatted_lines = self.format_table(table_buffer)
                result_lines.extend(formatted_lines)

            else:
                for line in table_buffer: result_lines.append(self.format_line(line))

            table_buffer = []
            in_table = False

        def flush_code_block():
            nonlocal result_lines, code_buffer, code_block_lang
            if not code_buffer:
                return

            code_content = '\n'.join(code_buffer)

            if self.syntax_highlighter and code_block_lang:
                try:
                    highlighted = self.syntax_highlighter.highlight(code_content, language=code_block_lang)
                    result_lines.append(f"{self.CODE_BG}{self.CODE_FG}")
                    result_lines.append(highlighted)
                    result_lines.append(self.CODE_RESET)

                except Exception:
                    # Fallback to plain literal block on any error
                    result_lines.append(f"{self.CODE_BG}{self.CODE_FG}")
                    result_lines.append(self.LITERAL_START)
                    result_lines.append(self._escape_literals(code_content))
                    result_lines.append(self.LITERAL_END)
                    result_lines.append(self.CODE_RESET)
            else:
                result_lines.append(f"{self.CODE_BG}{self.CODE_FG}")
                result_lines.append(self.LITERAL_START)
                result_lines.append(self._escape_literals(code_content))
                result_lines.append(self.LITERAL_END)
                result_lines.append(self.CODE_RESET)

            code_buffer = []

        for line in lines:
            is_fence, lang_hint = self._detect_code_fence(line)

            if line.startswith("-") and not line.startswith("---") and not line.startswith("- "): line = f"\\{line}"
            if line.startswith(">"): line = f"\\{line}"
            if line.startswith("<"): line = f"\\{line}"

            if is_fence:
                # Flush any pending structures before code fence
                flush_quote_buffer()
                flush_table_buffer()

                if not in_code_block:
                    # Opening fence, start buffering
                    in_code_block = True
                    code_block_lang = lang_hint.strip() if lang_hint else None
                    code_buffer = []

                else:
                    # Closing fence, flush highlighted code
                    flush_code_block()
                    in_code_block = False
                    code_block_lang = None

            else:
                # Buffer code lines for later highlighting
                if in_code_block: code_buffer.append(line)
                else:
                    quote_match = self.QUOTE_RE.match(line)
                    if quote_match:
                        if not in_quote:
                            flush_table_buffer()
                            in_quote = True
                            quote_buffer = []

                        quote_buffer.append(quote_match.group(1))

                    else:
                        if in_quote:
                            flush_quote_buffer()
                            if line.strip() != "":
                                if self._is_table_row(line):
                                    in_table = True
                                    table_buffer = [line]

                                else:
                                    formatted = self.format_line(line)
                                    result_lines.append(formatted)

                            # Pass through blank line as separator
                            else: result_lines.append("")

                        else:
                            if self._is_table_row(line):
                                if not in_table:
                                    in_table = True
                                    table_buffer = [line]

                                else: table_buffer.append(line)

                            else:
                                # Line breaks table, flush buffer
                                if in_table: flush_table_buffer()
                                formatted = self.format_line(line)
                                result_lines.append(formatted)

        # Handle unclosed structures
        if in_quote: flush_quote_buffer()
        if in_table: flush_table_buffer()
        if in_code_block: flush_code_block()

        return '\n'.join(result_lines)

    def format_line(self, line, mode="normal"):
        if mode == "codeblock": return self._escape_literals(line)

        if self.HORIZONTAL_RULE_RE.match(line): return self._format_horizontal_rule()

        header_match = self.HEADER_RE.match(line)
        if header_match: return self._format_header(header_match)

        list_match = self.UNORDERED_LIST_RE.match(line)
        if list_match: return self._format_list_item(list_match)

        line = self._format_inline(line)

        return line

    def _format_inline(self, text):
        code_blocks = []
        def extract_code(match):
            code_blocks.append(match.group(1))
            return f"\x00CODE{len(code_blocks)-1}\x00"

        links = []
        def extract_link(match):
            links.append((match.group(1), match.group(2)))
            return f"\x00LINK{len(links)-1}\x00"

        text = self.LINK_RE.sub(extract_link, text)
        text = self.INLINE_CODE_RE.sub(extract_code, text)
        text = self.BOLD_RE.sub(self._bold_sub, text)
        text = self.ITALIC_RE.sub(self._italic_sub, text)

        def restore_link(match):
            idx = int(match.group(1))
            text, url = links[idx]

            anchor_components = url.split("#")
            url = anchor_components[0]
            anchor = anchor_components[1] if len(anchor_components) > 1 else ""

            if not ":/" in url:
                url = f"{self.local_url_scope}{url}"
                if anchor: url = f"{url}|anchor={anchor}"

            text = text.replace('`', '')
            return f"`!`[{text}`{url}]`!"

        text = re.sub(r'\x00LINK(\d+)\x00', restore_link, text)

        def restore_code(match):
            idx = int(match.group(1))
            content = code_blocks[idx]
            content = content.replace('`', '\\`')
            return f"{self.CODE_BG_INLINE}{self.CODE_FG}{content}{self.CODE_RESET}"

        text = re.sub(r'\x00CODE(\d+)\x00', restore_code, text)
        return text

    def _highlight_inline_code(self, content):
        if not self.syntax_highlighter: return None
        return self.syntax_highlighter.highlight(content, language=None)

    def _bold_sub(self, match):
        content = match.group(1) or match.group(2)
        return f"{self.BOLD}{content}{self.BOLD_END}"

    def _italic_sub(self, match):
        content = match.group(1) or match.group(2)
        return f"{self.ITALIC}{content}{self.ITALIC_END}"

    def _format_header(self, match):
        hashes = match.group(1)
        content = match.group(2)
        level = len(hashes)
        prefix = ">" * min(level, 6)
        return f"{prefix}{self._format_inline(content)}"

    def _format_list_item(self, match):
        indent = match.group(1)
        content = match.group(3)
        content = self._format_inline(content)
        return f"{indent} {self.BULLET} {content}"

    def _format_horizontal_rule(self):
        return "-"

    def _detect_code_fence(self, line):
        match = self.CODE_FENCE_RE.match(line)
        if match:
            # match.group(2) contains everything after the backticks (language hint)
            return True, match.group(2)
        return False, ""

    def _is_table_row(self, line):
        if '|' not in line: return False
        match = self.TABLE_ROW_RE.match(line)
        if match is None: return False
        content = match.group(1)
        return '|' in content or line.strip().startswith('|')

    def _is_table_separator(self, line):
        if '|' not in line: return False
        match = self.TABLE_SEP_RE.match(line)
        return match is not None

    def _escape_literals(self, text):
        return text.replace('`', '\\`')

    def format_table(self, rows, align="c"):
        if len(rows) < 2: return rows

        # Parse header and separator
        header_cells = self._parse_table_row(rows[0])
        alignments = self._parse_table_alignments(rows[1])

        # Ensure alignment count matches header cells
        while len(alignments) < len(header_cells): alignments.append('left')
        alignments = alignments[:len(header_cells)]

        # Parse data rows
        data_rows = []
        for i in range(2, len(rows)):
            cells = self._parse_table_row(rows[i])
            while len(cells) < len(header_cells): cells.append("")
            cells = cells[:len(header_cells)]
            data_rows.append(cells)

        # Calculate column widths based on content
        num_cols = len(header_cells)
        col_widths = [0] * num_cols

        all_rows = [header_cells] + data_rows
        for row in all_rows:
            for i, cell in enumerate(row):
                formatted = self._format_inline(cell)
                width = self._visible_width(formatted)
                col_widths[i] = max(col_widths[i], width)

        # Apply minimum width and calculate total
        col_widths = [max(w, self.TABLE_MIN_COL_WIDTH) for w in col_widths]

        # Check max_width constraint
        # Total = sum of columns + 3 chars per column (space + 2 borders) + 1 for final border
        total_width = sum(col_widths) + (num_cols * 3) + 1

        if total_width > self.max_width:
            # Reduce widest columns proportionally
            excess = total_width - self.max_width
            indexed_widths = [(i, w) for i, w in enumerate(col_widths)]
            indexed_widths.sort(key=lambda x: -x[1])

            for i, w in indexed_widths:
                if excess <= 0: break
                reduction = min(excess, w - self.TABLE_MIN_COL_WIDTH)
                col_widths[i] -= reduction
                excess -= reduction

        # Build formatted table
        result = []

        # Alignment start
        if align: result.append(f"`{align}")

        # Top border
        border = self.TABLE_TL
        for i, w in enumerate(col_widths):
            border += self.TABLE_H * (w + 2)
            if i < len(col_widths) - 1: border += self.TABLE_TM
            else:                       border += self.TABLE_TR

        result.append(self._escape_literals(border))

        # Header row
        header_line = self.TABLE_V
        for i, cell in enumerate(header_cells):
            formatted = self._format_inline(cell)
            padded = self._pad_cell(formatted, col_widths[i], 'left')
            header_line += f" {padded} {self.TABLE_V}"
        result.append(self._escape_literals(header_line))

        # Separator row
        sep_line = self.TABLE_ML
        for i, w in enumerate(col_widths):
            cell_width = w + 2
            sep_line += self.TABLE_H * cell_width

            if i < len(col_widths) - 1: sep_line += self.TABLE_MM
            else:                       sep_line += self.TABLE_MR

        result.append(self._escape_literals(sep_line))

        # Data rows
        for row in data_rows:
            row_line = self.TABLE_V
            for i, cell in enumerate(row):
                formatted = self._format_inline(cell)
                padded = self._pad_cell(formatted, col_widths[i], alignments[i])
                row_line += f" {padded} {self.TABLE_V}"

            result.append(row_line)

        # Bottom border
        border = self.TABLE_BL
        for i, w in enumerate(col_widths):
            border += self.TABLE_H * (w + 2)
            if i < len(col_widths) - 1: border += self.TABLE_BM
            else:                       border += self.TABLE_BR

        result.append(self._escape_literals(border))

        # End alignment
        if align: result.append("`a")

        return result

    def format_table_raw(self, rows, align="c"):
        if len(rows) < 2: return rows

        # Parse header and separator
        header_cells = self._parse_table_row(rows[0])
        alignments = self._parse_table_alignments(rows[1])

        # Ensure alignment count matches header cells
        while len(alignments) < len(header_cells): alignments.append('left')
        alignments = alignments[:len(header_cells)]

        # Parse data rows
        data_rows = []
        for i in range(2, len(rows)):
            cells = self._parse_table_row(rows[i])
            while len(cells) < len(header_cells): cells.append("")
            cells = cells[:len(header_cells)]
            data_rows.append(cells)

        # Calculate column widths based on raw content
        num_cols = len(header_cells)
        col_widths = [0] * num_cols

        all_rows = [header_cells] + data_rows
        for row in all_rows:
            for i, cell in enumerate(row):
                width = self._visible_width(cell)
                col_widths[i] = max(col_widths[i], width)

        # Apply minimum width and calculate total
        col_widths = [max(w, self.TABLE_MIN_COL_WIDTH) for w in col_widths]

        # Check max_width constraint
        total_width = sum(col_widths) + (num_cols * 3) + 1

        if total_width > self.max_width:
            # Reduce widest columns proportionally
            excess = total_width - self.max_width
            indexed_widths = [(i, w) for i, w in enumerate(col_widths)]
            indexed_widths.sort(key=lambda x: -x[1])

            for i, w in indexed_widths:
                if excess <= 0: break
                reduction = min(excess, w - self.TABLE_MIN_COL_WIDTH)
                col_widths[i] -= reduction
                excess -= reduction

        # Build formatted table
        result = []

        # Alignment start
        if align: result.append(f"`{align}")

        # Top border
        border = self.TABLE_TL
        for i, w in enumerate(col_widths):
            border += self.TABLE_H * (w + 2)
            if i < len(col_widths) - 1: border += self.TABLE_TM
            else:                       border += self.TABLE_TR

        result.append(self._escape_literals(border))

        # Header row
        header_line = self.TABLE_V
        for i, cell in enumerate(header_cells):
            padded = self._pad_cell(cell, col_widths[i], 'left')
            header_line += f" {padded} {self.TABLE_V}"
        result.append(header_line)

        # Separator row - clean horizontal lines without alignment markers
        sep_line = self.TABLE_ML
        for i, w in enumerate(col_widths):
            cell_width = w + 2
            sep_line += self.TABLE_H * cell_width

            if i < len(col_widths) - 1: sep_line += self.TABLE_MM
            else:                       sep_line += self.TABLE_MR

        result.append(self._escape_literals(sep_line))

        # Data rows (with alignment)
        for row in data_rows:
            row_line = self.TABLE_V
            for i, cell in enumerate(row):
                padded = self._pad_cell(cell, col_widths[i], alignments[i])
                row_line += f" {padded} {self.TABLE_V}"

            result.append(row_line)

        # Bottom border
        border = self.TABLE_BL
        for i, w in enumerate(col_widths):
            border += self.TABLE_H * (w + 2)
            if i < len(col_widths) - 1: border += self.TABLE_BM
            else:                       border += self.TABLE_BR

        result.append(self._escape_literals(border))

        # End alignment
        if align: result.append("`a")

        return result

    def _parse_table_row(self, line):
        line = line.strip()
        if line.startswith('|'): line = line[1:]
        if line.endswith('|'):   line = line[:-1]

        cells = []
        current = ""
        escaped = False
        for char in line:
            if escaped:
                current += char
                escaped = False
            elif char == '\\':
                escaped = True
            elif char == '|':
                cells.append(current.strip())
                current = ""
            else:
                current += char

        cells.append(current.strip())
        return cells

    def _parse_table_alignments(self, line):
        cells = self._parse_table_row(line)
        alignments = []
        for cell in cells:
            cell = cell.strip()
            if cell.startswith(':') and cell.endswith(':'): alignments.append('center')
            elif cell.endswith(':'):                        alignments.append('right')
            else:                                           alignments.append('left')

        return alignments

    def _visible_width(self, text):
        text = re.sub(r'`[FB][0-9a-fA-F]{3}', '', text)
        text = re.sub(r'`[FB]T[0-9a-fA-F]{6}', '', text)
        text = re.sub(r'`[!*_=]', '', text)
        text = re.sub(r'`f`b', '', text)
        text = re.sub(r'`f', '', text)
        text = re.sub(r'`b', '', text)
        return self.display_width(text)

    def _pad_cell(self, text, width, align):
        text = self._truncate_cell(text, width)
        text_width = self._visible_width(text)
        padding = width - text_width

        if align == 'right':
            return " " * padding + text
        elif align == 'center':
            left = padding // 2
            right = padding - left
            return " " * left + text + " " * right
        else:
            return text + " " * padding

    def _truncate_cell(self, text, width):
        if self._visible_width(text) <= width: return text

        stripped = text
        stripped = re.sub(r'`[FB][0-9a-fA-F]{3}', '', stripped)
        stripped = re.sub(r'`[!*_]', '', stripped)
        stripped = re.sub(r'`f`b', '', stripped)

        if len(stripped) <= width - 1: return text

        truncated = stripped[:width - 1] + "…"
        return truncated

    def _wrap_text(self, text, width):
        if not text: return [""]

        words = text.split(' ')
        lines = []
        current_line = ""
        current_width = 0

        for word in words:
            if not word: continue

            word_width = self._visible_width(word)

            # Check if word alone exceeds width to force break it
            if word_width > width:
                if current_line:
                    lines.append(current_line)
                    current_line = ""
                    current_width = 0

                # Force break the long word character by character
                remaining = word
                while remaining:
                    # Binary search for how many characters fit
                    low, high = 1, len(remaining)
                    fit_chars = 0

                    while low <= high:
                        mid = (low + high) // 2
                        test_substr = remaining[:mid]
                        test_width = self._visible_width(test_substr)

                        if test_width <= width:
                            fit_chars = mid
                            low = mid + 1
                        else:
                            high = mid - 1

                    if fit_chars == 0: fit_chars = 1 # Need to force progress

                    lines.append(remaining[:fit_chars])
                    remaining = remaining[fit_chars:]

                continue

            # Check if word fits on current line
            space_width = 1 if current_line else 0
            if current_width + space_width + word_width <= width:
                if current_line:
                    current_line += " " + word
                    current_width += space_width + word_width
                else:
                    current_line = word
                    current_width = word_width
            else:
                # Flush current line and start new one
                lines.append(current_line)
                current_line = word
                current_width = word_width

        # Don't forget the last line
        if current_line: lines.append(current_line)

        return lines if lines else [""]


def convert_markdown_to_micron(text):
    converter = MarkdownToMicron()
    return converter.format_block(text)