cabinet/generate.py

340 lines
9.7 KiB
Python
Raw Normal View History

2022-02-14 12:01:16 -05:00
#!/bin/env python3.9
from subprocess import run
from datetime import date
2022-02-14 13:33:39 -05:00
from os import makedirs, environ
2022-02-14 12:01:16 -05:00
from os.path import relpath
from re import findall
from glob import glob
from pathlib import Path
from sys import argv, stderr
from shutil import copy, copytree, rmtree
2022-01-16 15:58:46 -05:00
from html.parser import HTMLParser
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
2022-02-14 12:01:16 -05:00
base_author = 'Augusto Gunsch'
input_root = Path('input')
output_root = Path('output')
file_output_root = output_root / Path('files')
templates_root = Path('templates')
static_root = Path('static')
if len(argv) > 1 and 'clean' not in argv:
print('usage: {} [clean]'.format(argv[0]), file=stderr)
2022-02-13 16:15:30 -05:00
exit(1)
2022-01-16 15:58:46 -05:00
2022-02-14 12:01:16 -05:00
if 'clean' in argv:
print('Cleaning output root')
rmtree(output_root, ignore_errors=True)
templates = {}
for template in templates_root.glob('*.html'):
templates[template.stem] = template.read_text()
def render_template(template, **kwargs):
for var, val in kwargs.items():
template = template.replace('${%s}' % var, str(val))
return template
2022-01-16 15:58:46 -05:00
class CodeHighlighter(HTMLParser):
data = ''
reading_code = False
code = ''
lang = ''
def output(self):
data = self.data
self.data = ''
return data
def handle_starttag(self, tag, attrs):
if tag == 'pre':
self.reading_code = True
if tag == 'code':
for attr in attrs:
if attr[0] == 'class':
self.lang = attr[1].split(' ')[1]
if not self.reading_code:
self.attrs = attrs
self.data += '<' + tag
for attr in attrs:
self.data += ' %s="%s"' % (attr[0], attr[1])
self.data += '>'
def handle_data(self, data):
if self.reading_code:
self.code += data
else:
self.data += data
def handle_endtag(self, tag):
if not self.reading_code:
self.data += '</%s>' % tag
if tag == 'pre':
self.reading_code = False
self.data += highlight(self.code,
get_lexer_by_name(self.lang),
HtmlFormatter(linenos=True))
self.code = ''
self.lang = ''
highlighter = CodeHighlighter()
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
class TeXFile:
def extract_tex_metadata(self):
m = findall(r'\\usepackage\[(.*)\]\{babel\}', self.raw_content)
self.lang = m[0] if m else 'english'
2022-02-13 16:15:30 -05:00
2022-02-14 12:01:16 -05:00
m = findall(r'\\title\{(.*)\}', self.raw_content)
self.title = m[0] if m else self.input_file.stem.replace('_', ' ')
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
m = findall(r'\\author\{(.*)\}', self.raw_content)
self.author = m[0] if m else base_author
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
m = findall(r'\\date\{(.*)\}', self.raw_content)
self.date = m[0] if m else date.today().strftime('%d/%m/%Y')
2022-01-15 14:55:28 -05:00
2022-02-14 12:01:16 -05:00
m = findall(r'\\documentclass\{(.*)\}', self.raw_content)
self.document_class = m[0] if m else 'article'
2022-01-15 14:55:28 -05:00
2022-02-17 10:13:05 -05:00
m = findall(r'\\usepackage(\[.*\])?\{biblatex\}', self.raw_content)
self.biblatex = bool(m)
2022-02-14 12:01:16 -05:00
def expand_macros(self):
content = self.raw_content
breadcrumbs = str(self.pretty_breadcrumbs).replace('>',
r'\textgreater\hspace{1pt}')
content = content.replace(r'\breadcrumbs', breadcrumbs)
outdir = (file_output_root/self.breadcrumbs).parent
content = content.replace(r'\outdir', str(outdir))
self.content = content
2022-01-15 14:55:28 -05:00
2022-02-14 12:01:16 -05:00
def __init__(self, input_file):
self.input_file = input_file
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
self.breadcrumbs = Path(*input_file.parts[len(input_root.parts):]).with_suffix('')
self.pretty_breadcrumbs = str(self.breadcrumbs) \
.replace('_', ' ') \
.replace('/', ' > ')
with open(input_file, 'r') as f:
self.raw_content = f.read()
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
self.mtime = input_file.stat().st_mtime
self.extract_tex_metadata()
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
self.expand_macros()
2022-01-14 19:32:43 -05:00
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
class FromTeX:
def __init__(self, tex_file, ext):
self.tex_file = tex_file
2022-02-13 16:15:30 -05:00
2022-02-14 12:01:16 -05:00
self.output_file = file_output_root / self.tex_file.breadcrumbs.with_suffix(ext)
2022-02-13 16:15:30 -05:00
2022-02-14 12:01:16 -05:00
self.mtime = self.output_file.stat().st_mtime \
if self.output_file.exists() else 0
self.is_outdated = self.mtime < self.tex_file.mtime
2022-02-13 16:15:30 -05:00
2022-02-14 12:01:16 -05:00
class HtmlFile(FromTeX):
def __init__(self, tex_file):
super().__init__(tex_file, '.html')
def write_output(self):
args = [
2022-02-13 16:15:30 -05:00
'pandoc',
2022-02-14 13:33:39 -05:00
'--mathjax=static/mathjax/es5/tex-mml-chtml.js',
2022-02-13 16:15:30 -05:00
'-f', 'latex',
'-t', 'html',
2022-02-14 12:01:16 -05:00
'-'
2022-02-13 16:15:30 -05:00
]
2022-02-14 12:01:16 -05:00
proc = run(args,
input=self.tex_file.content,
encoding='utf-8',
capture_output=True)
if proc.returncode != 0:
print(proc.stderr, file=stderr)
exit(proc.returncode)
body = proc.stdout
try:
template = templates[self.tex_file.document_class]
except:
print('No template named "{}.html"'.format(self.tex_file.document_class),
file=stderr)
exit(2)
root = Path(relpath(output_root, start=self.output_file)).parent
if self.tex_file.lang == 'portuguese':
lang_title = 'Título'
lang_author = 'Autor'
lang_date = 'Data da Ficha'
else:
lang_title = 'Title'
lang_author = 'Author'
lang_date = 'Report Date'
content = render_template(template,
lang_title=lang_title,
lang_author=lang_author,
lang_date=lang_date,
title=self.tex_file.title,
date=self.tex_file.date,
author=self.tex_file.author,
breadcrumbs=self.tex_file.pretty_breadcrumbs,
pdf=self.output_file.with_suffix('.pdf').name,
root=root,
body=body)
highlighter.feed(content)
content = highlighter.output()
makedirs(self.output_file.parent, exist_ok=True)
with open(self.output_file, 'w') as f:
f.write(content)
class PdfFile(FromTeX):
def __init__(self, tex_file):
super().__init__(tex_file, '.pdf')
2022-02-17 10:13:05 -05:00
def run_pdflatex(self):
2022-02-14 12:01:16 -05:00
args = [
'pdflatex',
'-jobname', self.output_file.stem,
2022-02-17 10:13:05 -05:00
'-output-directory', self.output_file.parent,
2022-02-14 13:33:39 -05:00
'-shell-escape'
2022-02-14 12:01:16 -05:00
]
2022-02-14 13:33:39 -05:00
env = {
**environ,
'TEXINPUTS': './include:'
}
2022-02-14 12:01:16 -05:00
proc = run(args,
2022-02-14 13:33:39 -05:00
env=env,
2022-02-14 12:01:16 -05:00
input=bytes(self.tex_file.content, 'utf-8'),
capture_output=True)
2022-02-13 16:15:30 -05:00
2022-02-14 12:01:16 -05:00
if proc.returncode != 0:
print(proc.stdout, file=stderr)
2022-02-14 13:33:39 -05:00
print(proc.stderr, file=stderr)
2022-02-14 12:01:16 -05:00
exit(proc.returncode)
2022-01-14 18:07:31 -05:00
2022-02-17 10:13:05 -05:00
def run_biber(self):
args = [
'biber',
self.output_file.with_suffix('')
]
proc = run(args,
capture_output=True)
if proc.returncode != 0:
print(proc.stdout, file=stderr)
print(proc.stderr, file=stderr)
exit(proc.returncode)
def write_output(self):
makedirs(self.output_file.parent, exist_ok=True)
self.run_pdflatex()
if self.tex_file.biblatex:
self.run_biber()
self.run_pdflatex()
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
def write_files():
changed = False
2022-01-16 15:58:46 -05:00
2022-02-14 12:01:16 -05:00
for input_file in input_root.glob('**/*.tex'):
tex_file = TeXFile(input_file)
2022-01-16 15:58:46 -05:00
2022-02-14 12:01:16 -05:00
html_file = HtmlFile(tex_file)
pdf_file = PdfFile(tex_file)
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
if html_file.is_outdated:
print('Generating "{}"'.format(html_file.output_file))
html_file.write_output()
changed = True
2022-01-14 18:22:15 -05:00
2022-02-14 12:01:16 -05:00
if pdf_file.is_outdated:
print('Generating "{}"'.format(pdf_file.output_file))
pdf_file.write_output()
changed = True
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
return changed
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
def copy_static_files():
if not output_root.exists():
makedirs(output_root)
2022-01-14 19:32:43 -05:00
2022-02-14 12:01:16 -05:00
for entity in static_root.iterdir():
dest = output_root/Path(*entity.parts[len(static_root.parts):])
if not dest.exists():
print('Copying "{}" to "{}"'.format(entity, dest))
if entity.is_dir():
copytree(entity, dest)
else:
copy(entity, dest)
2022-01-14 18:07:31 -05:00
2022-02-14 12:01:16 -05:00
def make_details(directory):
html = ''
2022-01-14 19:49:11 -05:00
2022-02-14 12:01:16 -05:00
if directory != input_root:
html += '<details open>'
html += '<summary>{}</summary>'.format(directory.name.replace('_', ' '))
2022-01-14 19:49:11 -05:00
2022-02-14 12:01:16 -05:00
html += '<ul>'
for file in directory.iterdir():
if file.is_file():
if file.suffix == '.tex':
outfile = Path(*file.resolve().parts[len(input_root.resolve().parts):])
outfile = ('files'/outfile).with_suffix('.html')
2022-01-15 14:55:28 -05:00
2022-02-14 12:01:16 -05:00
html += '<li><a href="{}">{}</a></li>'.format(outfile,
file.stem.replace('_', ' '))
else:
html += make_details(file)
html += '</ul>'
2022-01-15 14:55:28 -05:00
2022-02-14 12:01:16 -05:00
if directory != input_root:
html += '</details>'
2022-01-14 18:22:15 -05:00
2022-02-14 12:01:16 -05:00
return html
2022-01-14 18:22:15 -05:00
2022-02-14 12:01:16 -05:00
def make_index():
html = '<ul id="toc">'
html += make_details(input_root)
html += '</ul>'
2022-01-14 18:22:15 -05:00
2022-02-14 12:01:16 -05:00
index = render_template(templates['index'],
toc=html)
2022-01-14 18:22:15 -05:00
2022-02-14 12:01:16 -05:00
with open(output_root / 'index.html', 'w') as f:
f.write(index)
2022-01-14 18:22:15 -05:00
2022-02-14 12:01:16 -05:00
copy_static_files()
outdated_index = write_files()
2022-01-14 19:32:43 -05:00
2022-02-14 12:01:16 -05:00
if outdated_index:
2022-02-14 12:12:33 -05:00
print('Generating index')
2022-02-14 12:01:16 -05:00
make_index()