#!/usr/bin/env python3 import os import sys import re from pathlib import Path LINE_START_PATTERNS = [ r' r'\\\\newpage', # LaTeX newpage commands ] LINE_ANY_PATTERNS = [ # r'
', # r']*>', # r'', ] def compile_patterns(): start_patterns = [re.compile(p) for p in LINE_START_PATTERNS] any_patterns = [re.compile(p) for p in LINE_ANY_PATTERNS] return start_patterns, any_patterns def should_remove_line(line, start_patterns, any_patterns): stripped = line.strip() for pattern in start_patterns: if pattern.match(stripped): return True for pattern in any_patterns: if pattern.search(stripped): return True return False def clean_markdown_content(content, start_patterns, any_patterns, api_ref=False): content = content.replace("**\n : ", "**\n ") content = content.replace("\n* **", "\n\n* **") content = content.replace("\n\n\n", "\n\n") lines = content.split('\n') result = [] skip_next_empty = False for i, line in enumerate(lines): if should_remove_line(line, start_patterns, any_patterns): skip_next_empty = True continue if skip_next_empty: if line.strip() == '': continue else: skip_next_empty = False if api_ref: if line.startswith("### ") or line.startswith("#### "): line = line.replace("*", "") line = line.replace("\\_", "_") if line.startswith("### "): line = line.replace("### ", "### `") if line.startswith("#### "): line = line.replace("#### ", "#### `") line = f"{line}`" line = line.replace("
", "") result.append(line) # Remove trailing empty lines from end of file while result and result[-1].strip() == '': result.pop() return '\n'.join(result) def process_file(filepath, start_patterns, any_patterns): try: with open(filepath, 'r', encoding='utf-8') as f: original_content = f.read() api_ref = str(filepath) == "markdown/reference.md" cleaned_content = clean_markdown_content(original_content, start_patterns, any_patterns, api_ref=api_ref) if cleaned_content != original_content: with open(filepath, 'w', encoding='utf-8') as f: f.write(cleaned_content) return True return False except Exception as e: print(f"Error processing {filepath}: {e}", file=sys.stderr) return False def find_markdown_files(directory): md_files = [] for root, _, files in os.walk(directory): for filename in files: if filename.endswith('.md'): md_files.append(Path(root) / filename) return md_files def main(): if len(sys.argv) < 2: print("Usage: python clean_markdown.py ", file=sys.stderr) sys.exit(1) directory = sys.argv[1] if not os.path.isdir(directory): print(f"Error: '{directory}' is not a valid directory", file=sys.stderr) sys.exit(1) start_patterns, any_patterns = compile_patterns() md_files = find_markdown_files(directory) if not md_files: print(f"No markdown files found in '{directory}'") return modified_count = 0 for filepath in md_files: if process_file(filepath, start_patterns, any_patterns): print(f"Cleaned: {filepath}") modified_count += 1 print(f"\nProcessed {len(md_files)} file(s), modified {modified_count}") if __name__ == '__main__': main()