#!/bin/env python3.9 from subprocess import run from datetime import date from os import makedirs, environ from os.path import relpath from re import findall from glob import glob from pathlib import Path from sys import argv, stderr from shutil import copy, copytree, rmtree from html.parser import HTMLParser from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter base_author = 'Augusto Gunsch' input_root = Path('input') output_root = Path('output') file_output_root = output_root / Path('files') templates_root = Path('templates') static_root = Path('static') if len(argv) > 1 and 'clean' not in argv: print('usage: {} [clean]'.format(argv[0]), file=stderr) exit(1) if 'clean' in argv: print('Cleaning output root') rmtree(output_root, ignore_errors=True) templates = {} for template in templates_root.glob('*.html'): templates[template.stem] = template.read_text() def render_template(template, **kwargs): for var, val in kwargs.items(): template = template.replace('${%s}' % var, str(val)) return template class CodeHighlighter(HTMLParser): data = '' reading_code = False code = '' lang = '' def output(self): data = self.data self.data = '' return data def handle_starttag(self, tag, attrs): if tag == 'pre': self.reading_code = True if tag == 'code': for attr in attrs: if attr[0] == 'class': self.lang = attr[1].split(' ')[1] if not self.reading_code: self.attrs = attrs self.data += '<' + tag for attr in attrs: self.data += ' %s="%s"' % (attr[0], attr[1]) self.data += '>' def handle_data(self, data): if self.reading_code: self.code += data else: self.data += data def handle_endtag(self, tag): if not self.reading_code: self.data += '' % tag if tag == 'pre': self.reading_code = False self.data += highlight(self.code, get_lexer_by_name(self.lang), HtmlFormatter(linenos=True)) self.code = '' self.lang = '' highlighter = CodeHighlighter() class TeXFile: def extract_tex_metadata(self): m = findall(r'\\usepackage\[(.*)\]\{babel\}', self.raw_content) self.lang = m[0] if m else 'english' m = findall(r'\\title\{(.*)\}', self.raw_content) self.title = m[0] if m else self.input_file.stem.replace('_', ' ') m = findall(r'\\author\{(.*)\}', self.raw_content) self.author = m[0] if m else base_author m = findall(r'\\date\{(.*)\}', self.raw_content) self.date = m[0] if m else date.today().strftime('%d/%m/%Y') m = findall(r'\\documentclass\{(.*)\}', self.raw_content) self.document_class = m[0] if m else 'article' def expand_macros(self): content = self.raw_content breadcrumbs = str(self.pretty_breadcrumbs).replace('>', r'\textgreater\hspace{1pt}') content = content.replace(r'\breadcrumbs', breadcrumbs) outdir = (file_output_root/self.breadcrumbs).parent content = content.replace(r'\outdir', str(outdir)) self.content = content def __init__(self, input_file): self.input_file = input_file self.breadcrumbs = Path(*input_file.parts[len(input_root.parts):]).with_suffix('') self.pretty_breadcrumbs = str(self.breadcrumbs) \ .replace('_', ' ') \ .replace('/', ' > ') with open(input_file, 'r') as f: self.raw_content = f.read() self.mtime = input_file.stat().st_mtime self.extract_tex_metadata() self.expand_macros() class FromTeX: def __init__(self, tex_file, ext): self.tex_file = tex_file self.output_file = file_output_root / self.tex_file.breadcrumbs.with_suffix(ext) self.mtime = self.output_file.stat().st_mtime \ if self.output_file.exists() else 0 self.is_outdated = self.mtime < self.tex_file.mtime class HtmlFile(FromTeX): def __init__(self, tex_file): super().__init__(tex_file, '.html') def write_output(self): args = [ 'pandoc', '--mathjax=static/mathjax/es5/tex-mml-chtml.js', '-f', 'latex', '-t', 'html', '-' ] proc = run(args, input=self.tex_file.content, encoding='utf-8', capture_output=True) if proc.returncode != 0: print(proc.stderr, file=stderr) exit(proc.returncode) body = proc.stdout try: template = templates[self.tex_file.document_class] except: print('No template named "{}.html"'.format(self.tex_file.document_class), file=stderr) exit(2) root = Path(relpath(output_root, start=self.output_file)).parent if self.tex_file.lang == 'portuguese': lang_title = 'Título' lang_author = 'Autor' lang_date = 'Data da Ficha' else: lang_title = 'Title' lang_author = 'Author' lang_date = 'Report Date' content = render_template(template, lang_title=lang_title, lang_author=lang_author, lang_date=lang_date, title=self.tex_file.title, date=self.tex_file.date, author=self.tex_file.author, breadcrumbs=self.tex_file.pretty_breadcrumbs, pdf=self.output_file.with_suffix('.pdf').name, root=root, body=body) highlighter.feed(content) content = highlighter.output() makedirs(self.output_file.parent, exist_ok=True) with open(self.output_file, 'w') as f: f.write(content) class PdfFile(FromTeX): def __init__(self, tex_file): super().__init__(tex_file, '.pdf') def write_output(self): parent_dir = self.output_file.parent makedirs(parent_dir, exist_ok=True) args = [ 'pdflatex', '-jobname', self.output_file.stem, '-output-directory', parent_dir, '-shell-escape' ] env = { **environ, 'TEXINPUTS': './include:' } proc = run(args, env=env, input=bytes(self.tex_file.content, 'utf-8'), capture_output=True) if proc.returncode != 0: print(proc.stdout, file=stderr) print(proc.stderr, file=stderr) exit(proc.returncode) def write_files(): changed = False for input_file in input_root.glob('**/*.tex'): tex_file = TeXFile(input_file) html_file = HtmlFile(tex_file) pdf_file = PdfFile(tex_file) if html_file.is_outdated: print('Generating "{}"'.format(html_file.output_file)) html_file.write_output() changed = True if pdf_file.is_outdated: print('Generating "{}"'.format(pdf_file.output_file)) pdf_file.write_output() changed = True return changed def copy_static_files(): if not output_root.exists(): makedirs(output_root) for entity in static_root.iterdir(): dest = output_root/Path(*entity.parts[len(static_root.parts):]) if not dest.exists(): print('Copying "{}" to "{}"'.format(entity, dest)) if entity.is_dir(): copytree(entity, dest) else: copy(entity, dest) def make_details(directory): html = '' if directory != input_root: html += '
' html += '{}'.format(directory.name.replace('_', ' ')) html += '' if directory != input_root: html += '
' return html def make_index(): html = '' index = render_template(templates['index'], toc=html) with open(output_root / 'index.html', 'w') as f: f.write(index) copy_static_files() outdated_index = write_files() if outdated_index: print('Generating index') make_index()