Files
Reticulum/RNS/Utilities/rngit/highlight.py
T
Mark Qvist 4d6e164d62 Cleanup
2026-05-06 21:12:34 +02:00

411 lines
16 KiB
Python

# Reticulum License
#
# Copyright (c) 2016-2026 Mark Qvist
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# - The Software shall not be used in any kind of system which includes amongst
# its functions the ability to purposefully do harm to human beings.
#
# - The Software shall not be used, directly or indirectly, in the creation of
# an artificial intelligence, machine learning or language model training
# dataset, including but not limited to any use that contributes to the
# training or development of such a model or algorithm.
#
# - The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import os
import io
import RNS
class SyntaxHighlighter:
def __init__(self, theme=None):
self.pygments_available = False
self.pygments = None
self._lexer_cache = {}
self._check_pygments()
self.theme = theme or self._get_default_theme()
def _get_default_theme(self):
return {
# Control flow - warm coral-red
"keyword": "ff7b72",
"keyword_constant": "ff7b72",
"keyword_control": "ff7b72",
"keyword_declaration": "ff7b72",
# Function definitions - bright sky blue
"function_def": "79c0ff",
"function_magic": "ff7b72",
# Function calls - soft lavender
"function_call": "d2a8ff",
"function_builtin": "ffa657", # amber
# Class definitions - fresh mint green
"class_def": "7ee787",
"class_ref": "56d364", # muted when referenced
# Instance context - soft pink
"self": "ff9bce",
"cls": "ff9bce",
# Data literals - cool, calm ice blue
"string": "a5d6ff",
"string_quoted": "a5d6ff",
"string_doc": "8b949e", # docstrings - like comments
"string_interpol": "ffd700", # f-string braces - gold
"string_escape": "ffea00", # escape sequences - bright yellow
# Numbers - same as function def
"number": "79c0ff",
"number_float": "79c0ff",
"number_integer": "79c0ff",
"number_hex": "79c0ff",
# Comments - muted gray
"comment": "8b949e",
"comment_doc": "8b949e",
"comment_preproc": "ff7b72", # preprocessor directives
# Operators - distinct pink/red for visibility
"operator": "ff7b72", # General operators - coral
"operator_arithmetic": "ff7b72", # +, -, *, /, etc.
"operator_comparison": "ff7b72", # ==, !=, <, >, etc.
"operator_assignment": "ff7b72", # =, +=, -=, etc.
"operator_word": "ff7b72", # and, or, not, in, is
"operator_dot": "c9d1d9", # . - subtle for attribute access
# Punctuation - neutral
"punctuation": "b4b4b4",
"punctuation_brace": "b4b4b4", # [, ], {, }
"punctuation_paren": "b4b4b4", # (, )
"punctuation_colon": "b4b4b4", # :, ;
"punctuation_comma": "8b949e", # , - slightly dimmed
# Decorators - burnt orange
"decorator": "f0883e",
# Constants - same as keywords
"constant": "ff7b72",
"constant_builtin": "ff7b72", # True, False, None
# Type hints and annotations - amber
"type_hint": "ffa657",
"type_builtin": "ffa657",
# Exception handling - alert red
"exception": "f85149",
"exception_builtin": "f85149",
# Names and attributes - near-white for readability
"name": "e6edf3",
"attribute": "e6edf3",
"attribute_call": "d2a8ff", # Function/method calls after dot - lavender
"variable": "e6edf3",
"parameter": "e6edf3",
# Namespaces and modules
"namespace": "7ee787",
"module": "a5d6ff",
# Generic tokens
"generic_heading": "c9d1d9",
"generic_subheading": "c9d1d9",
"generic_prompt": "8b949e",
"generic_error": "f85149",
"generic_deleted": "f85149",
"generic_inserted": "7ee787",
"generic_output": "e6edf3",
# Text and whitespace - no color (None means no color tag)
"text": None,
"whitespace": None,
}
def _check_pygments(self):
try:
import pygments
from pygments.lexers import get_lexer_for_filename, guess_lexer, get_lexer_by_name
from pygments.formatter import Formatter
from pygments.token import Token
self.pygments = pygments
self.pygments_available = True
RNS.log("Pygments syntax highlighting available", RNS.LOG_DEBUG)
except ImportError:
self.pygments_available = False
RNS.log("Pygments not available, using plain text rendering", RNS.LOG_DEBUG)
def highlight(self, content, filename=None, language=None):
if not content: return self._plain_text(content)
if self.pygments_available:
try:
highlighted = self._highlight_pygments(content, filename, language)
# Fix pygments insisting on trailing newlines
if highlighted.endswith("\n") and not content.endswith("\n"): highlighted = highlighted[:-1]
return highlighted
except Exception as e:
RNS.log(f"Pygments highlighting failed, falling back: {e}", RNS.LOG_WARNING)
return self._plain_text(content)
# TODO: Implement Python tokenize fallback for .py files.
# For now, route to plain text
if filename and filename.endswith(".py"):
return self._plain_text(content)
# Universal fallback
return self._plain_text(content)
def _highlight_pygments(self, content, filename=None, language=None):
from pygments.lexers import get_lexer_for_filename, guess_lexer, get_lexer_by_name
from pygments.util import ClassNotFound
lexer = None
if language:
if language == "env": language = "bash"
if language == "environment": language = "bash"
try: lexer = get_lexer_by_name(language)
except ClassNotFound: pass
if lexer is None and filename:
try: lexer = get_lexer_for_filename(filename)
except ClassNotFound: pass
if lexer is None:
try:
if len(content) > 20: lexer = guess_lexer(content)
except ClassNotFound: pass
if lexer is None: return self._plain_text(content)
formatter = MicronFormatter(theme=self.theme)
result = self.pygments.highlight(content, lexer, formatter)
return result
def _plain_text(self, content):
escaped = self._escape_micron(content)
return f"`=\n{escaped}\n`="
@staticmethod
def _escape_micron(text): return text.replace("`", "\\`")
class MicronFormatter:
def __init__(self, theme, **options):
self.theme = theme
self.options = options
def format(self, tokensource, outfile):
output_parts = []
prev_was_dot = False
last_ended_with_break = True
for ttype, value in tokensource:
is_dot = (str(ttype) == "Token.Operator" and value == ".")
ends_with_break = value.endswith("\n")
# If previous token was a dot and this is a Name, treat as attribute/function call
# TODO: Improve this if we can check next token as parantheses or something.
if prev_was_dot and str(ttype).startswith("Token.Name") and value:
color = self._get_color_from_key("attribute_call")
if color:
escaped = self._escape_value(value)
output_parts.append(f"`FT{color}{escaped}`f")
else:
output_parts.append(self._escape_value(value))
else:
color_key = self._get_color_key_for_token(ttype)
color = self._get_color_from_key(color_key)
if color and value:
escaped = self._escape_value(value)
if escaped.startswith("\n"): ilb = "\n"; escaped = escaped[1:]
else: ilb = ""
if escaped.endswith("\n"): tlb = "\n"; escaped = escaped[:-1]
else: tlb = ""
if len(escaped): output = f"{ilb}`FT{color}{escaped}`f{tlb}"
else: output = f"{ilb}{tlb}"
output_parts.append(output)
else:
escaped = self._escape_value(value)
if "\n" in escaped:
parts = []
splitl = escaped.splitlines()
if len(splitl) > 1:
for line in splitl:
if line.startswith("-"): l = f"\\{line}"
elif line.startswith(">"): l = f"\\{line}"
elif line.startswith("<"): l = f"\\{line}"
else: l = line
parts.append(l)
trmpart = "\n" if escaped.endswith("\n") else ""
escaped = "\n".join(parts)+trmpart
elif last_ended_with_break:
if escaped.startswith("-"): escaped = f"\\{escaped}"
elif escaped.startswith(">"): escaped = f"\\{escaped}"
elif escaped.startswith("<"): escaped = f"\\{escaped}"
output_parts.append(escaped)
prev_was_dot = is_dot
last_ended_with_break = ends_with_break
output = "".join(output_parts)
outfile.write(output)
def _get_color_key_for_token(self, ttype):
token_parts = []
current = ttype
while current:
token_parts.insert(0, current[0] if isinstance(current, tuple) else str(current).split(".")[-1])
current = current.parent if hasattr(current, "parent") else None
token_str = ".".join(["Token"] + token_parts[1:] if len(token_parts) > 1 else token_parts)
current_type = ttype
while current_type:
token_key = str(current_type)
if token_key in granular_token_map: return granular_token_map[token_key]
# Move to parent
current_type = current_type.parent if hasattr(current_type, "parent") else None
return None
def _get_color_from_key(self, color_key):
if color_key and color_key in self.theme: return self.theme[color_key]
return None
@staticmethod
def _escape_value(value: str) -> str: return value.replace("`", "\\`")
# Required by Pygments formatter API, returns None for Micron
def get_style_defs(self, arg=None): return None
# Convenience function for direct use
def highlight_code(content: str, filename: str = None, language: str = None, theme=None) -> str:
highlighter = SyntaxHighlighter(theme=theme)
return highlighter.highlight(content, filename, language)
granular_token_map = {
# Keywords with semantic distinction
"Token.Keyword": "keyword",
"Token.Keyword.Constant": "keyword_constant",
"Token.Keyword.Declaration": "keyword_declaration",
"Token.Keyword.Namespace": "keyword_control",
"Token.Keyword.Pseudo": "keyword_control",
"Token.Keyword.Reserved": "keyword_control",
"Token.Keyword.Type": "type_builtin",
# Names - functions with definition vs call distinction
"Token.Name.Function": "function_call",
"Token.Name.Function.Magic": "function_magic",
"Token.Name.Class": "class_ref",
"Token.Name.Builtin": "function_builtin",
"Token.Name.Builtin.Pseudo": "constant_builtin",
"Token.Name.Exception": "exception_builtin",
"Token.Name.Decorator": "decorator",
"Token.Name.Namespace": "namespace",
"Token.Name.Attribute": "attribute",
"Token.Name.Variable": "variable",
"Token.Name.Variable.Magic": "function_magic",
"Token.Name.Other": "name",
"Token.Name": "name",
"Token.Name.Tag": "keyword", # HTML/XML tags
"Token.Name.Constant": "constant",
"Token.Name.Label": "name",
"Token.Name.Entity": "name",
# Literals - strings with detailed handling
"Token.Literal.String": "string",
"Token.Literal.String.Affix": "string", # f, r, b prefixes
"Token.Literal.String.Backtick": "string",
"Token.Literal.String.Char": "string",
"Token.Literal.String.Delimiter": "string",
"Token.Literal.String.Doc": "string_doc",
"Token.Literal.String.Double": "string_quoted",
"Token.Literal.String.Escape": "string_escape",
"Token.Literal.String.Heredoc": "string",
"Token.Literal.String.Interpol": "string_interpol",
"Token.Literal.String.Other": "string",
"Token.Literal.String.Regex": "string",
"Token.Literal.String.Single": "string_quoted",
"Token.Literal.String.Symbol": "string",
# Numbers
"Token.Literal.Number": "number",
"Token.Literal.Number.Bin": "number",
"Token.Literal.Number.Float": "number_float",
"Token.Literal.Number.Hex": "number_hex",
"Token.Literal.Number.Integer": "number_integer",
"Token.Literal.Number.Integer.Long": "number_integer",
"Token.Literal.Number.Oct": "number",
"Token.Literal": "string",
"Token.Literal.Date": "string",
# Operators - all operators get distinct coloring
"Token.Operator": "operator",
"Token.Operator.Word": "operator_word",
"Token.Operator.Comparison": "operator_comparison",
"Token.Operator.Assignment": "operator_assignment",
"Token.Operator.Arithmetic": "operator_arithmetic",
# Punctuation - braces, parens, colons, commas
"Token.Punctuation": "punctuation",
"Token.Punctuation.Marker": "punctuation",
"Token.Punctuation.Brace": "punctuation_brace",
"Token.Punctuation.Bracket": "punctuation_brace",
"Token.Punctuation.Parenthesis": "punctuation_paren",
"Token.Punctuation.Colon": "punctuation_colon",
"Token.Punctuation.Comma": "punctuation_comma",
# Comments
"Token.Comment": "comment",
"Token.Comment.Hashbang": "comment",
"Token.Comment.Multiline": "comment_doc",
"Token.Comment.Preproc": "comment_preproc",
"Token.Comment.Single": "comment",
"Token.Comment.Special": "comment",
# Generic tokens
"Token.Generic.Deleted": "generic_deleted",
"Token.Generic.Emph": "text",
"Token.Generic.Error": "generic_error",
"Token.Generic.Heading": "generic_heading",
"Token.Generic.Inserted": "generic_inserted",
"Token.Generic.Output": "generic_output",
"Token.Generic.Prompt": "generic_prompt",
"Token.Generic.Strong": "text",
"Token.Generic.Subheading": "generic_subheading",
"Token.Generic.Traceback": "generic_error",
"Token.Generic": "text",
# Text and whitespace
"Token.Text": "text",
"Token.Text.Whitespace": "whitespace",
}