Improve code quality

This commit is contained in:
Augusto Gunsch 2021-11-15 11:44:50 -03:00
parent 84192b0604
commit 77671d960f
No known key found for this signature in database
GPG Key ID: F7EEFE29825C72DC
1 changed files with 214 additions and 146 deletions

View File

@ -26,64 +26,122 @@ import argparse
import re import re
import chardet import chardet
import os import os
import pathlib import copy
from pathlib import Path, PosixPath
class Time: def panic(message, code):
def __init__(self, time_str, file_name, line_number): print(message, file=sys.stderr)
sys.exit(code)
class TimeStamp:
def __init__(self, time_str):
parsed_time = time_str.split(':') parsed_time = time_str.split(':')
try:
h = int(parsed_time[0]) h = int(parsed_time[0])
m = int(parsed_time[1]) m = int(parsed_time[1])
ms = int(parsed_time[2].replace(',', '')) ms = int(parsed_time[2].replace(',', ''))
# self.time: time in milliseconds
self.time = h * 3600000 + m * 60000 + ms self.time = h * 3600000 + m * 60000 + ms
except Exception:
print('Invalid time format detected ({}:{})'
.format(file_name, line_number),
file=sys.stderr)
sys.exit(1)
def add(self, ms): def getmilliseconds(self):
self.time += ms return self.time % 1000
def getseconds(self):
return (self.time % 60000) / 1000
def getminutes(self):
return (self.time / 60000) % 60
def gethours(self):
return self.time / 3600000
millisecods = property(getmilliseconds)
seconds = property(getseconds)
minutes = property(getminutes)
hours = property(gethours)
def __iadd__(self, other):
t = type(other)
if t is int:
self.time += other
elif t is type(self):
self.time += other.time
else:
raise TypeError
return self
def __neg__(self):
new = copy.deepcopy(self)
new.time = -new.time
return new
def __isub__(self, other):
return self.__iadd__(-other)
def __lt__(self, other):
return self.time < other.time
def __le__(self, other):
return self.time <= other.time
def __eq__(self, other):
return self.time == other.time
def __gt__(self, other):
return self.time > other.time
def __ge__(self, other):
return self.time >= other.time
def __repr__(self): def __repr__(self):
ms = self.time % 1000 return '%02d:%02d:%02d,%03d' % \
s = (self.time % 60000) / 1000 (self.hours, self.minutes, self.seconds, self.millisecods)
m = (self.time / 60000) % 60
h = self.time / 3600000
return '%02d:%02d:%02d,%03d' % (h, m, s, ms)
class Subtitle: class Subtitle:
# Parse a single subtitle
def __init__(self, lines, file_name, line_number): def __init__(self, lines, file_name, line_number):
if type(lines) is str:
lines = lines.splitlines()
try: try:
# This is mostly ignored, as the subtitles are renumbered later # This is mostly ignored, as the subtitles are renumbered later
self.number = int(lines.pop(0)) self.number = int(lines.pop(0))
except Exception: except Exception:
print('Invalid line number detected ({}:{})' panic('Invalid line number detected ({}:{})'
.format(file_name, line_number), .format(file_name, line_number), 1)
file=sys.stderr)
sys.exit(1)
line_number += 1 line_number += 1
try: try:
time_span = lines.pop(0).split(' --> ') time_span = lines.pop(0).split(' --> ')
self.time_start = Time(time_span[0], file_name, line_number)
self.time_end = Time(time_span[1], file_name, line_number)
except Exception: except Exception:
print('Invalid time span format detected ({}:{})' panic('Invalid time span format detected ({}:{})'
.format(file_name, line_number), .format(file_name, line_number), 1)
file=sys.stderr)
sys.exit(1) try:
self.time_start = TimeStamp(time_span[0])
self.time_end = TimeStamp(time_span[1])
except Exception:
panic('Invalid time stamp detected ({}:{})'
.format(file_name, line_number), 1)
if self.time_start >= self.time_end:
panic('End time must be greater than start time ({}:{})'
.format(file_name, line_number), 1)
self.content = lines self.content = lines
def __len__(self):
return len(self.content) + 2
def shift(self, ms): def shift(self, ms):
self.time_start.add(ms) self.time_start += ms
self.time_end.add(ms) self.time_end += ms
def replace(self, pattern, new_content):
for line in self.content:
line = pattern.replace(new_content, line)
def matches(self, regexp): def matches(self, regexp):
for line in self.content: for line in self.content:
@ -99,31 +157,46 @@ class Subtitle:
) )
def clean(subs, expressions): class ConfigFile:
# Cancel if no expression def __init__(self, args):
if len(expressions) == 0: # No reason to continue
return subs if not args.clean:
if args.config_file:
args.config_file.close()
self.expressions = []
return
# Remove lines matching any expression file = args.config_file
for regexp in expressions: # Set default config file if not specified
subs = filter(lambda sub: not sub.matches(regexp), subs) if not file:
home = Path.home()
return list(subs) if type(home) is PosixPath:
self.file_path = home / '.config' / 'fsubrc'
else:
self.file_path = Path(os.getenv('APPDATA')) / 'fsubrc'
try:
self.file_path.touch()
file = self.file_path.open(mode='r')
except PermissionError:
panic('Can\'t access file {}: Permission denied'
.format(self.file_path), 1)
else:
self.file_path = Path(file.name)
# Read expressions
lines = file.read().strip().splitlines()
file.close()
self.expressions = list(map(re.compile, lines))
def shift(subs, ms): class SubripFile:
for sub in subs: def read_file(file):
sub.shift(ms) # Check extension
return list(filter(lambda sub: sub.time_start.time >= 0, subs)) if file.name[-4:] != '.srt':
panic('File {} is not a SubRip file'.format(file.name), 1)
def strip_html(subs):
for sub in subs:
for i in range(0, len(sub.content)):
sub.content[i] = re.sub('<.+?>', '', sub.content[i])
def process_file(args, file, expressions):
# Read the input file # Read the input file
contents = file.read() contents = file.read()
file.close() file.close()
@ -131,82 +204,79 @@ def process_file(args, file, expressions):
# Decode the file contents # Decode the file contents
encoding = chardet.detect(contents)['encoding'] encoding = chardet.detect(contents)['encoding']
if encoding is None: if encoding is None:
print('Corrupt or empty file ({})'.format(file.name), panic('Corrupt or empty file ({})'.format(file.name), 1)
file=sys.stderr) return contents.decode(encoding)
sys.exit(1)
contents = contents.decode(encoding) # This method parses the file
def __init__(self, file):
self.file_name = file.name
contents = SubripFile.read_file(file)
# Count empty lines at the beginning # Count empty lines at the beginning
r = re.compile(r'\r?\n')
line_number = 1 line_number = 1
for line in r.split(contents): for line in contents.splitlines():
if len(line) == 0 or line.isspace(): if len(line) == 0 or line.isspace():
line_number += 1 line_number += 1
else: else:
break break
# Split subtitles on empty lines # Split subtitles on empty lines
subs = re.split(r'(?:\r?\n){2}', contents.strip()) chunks = re.split(r'(?:\r?\n){2}', contents.strip())
# Create Subtitle objects # Create Subtitle objects
subs_objs = [] self.subs = []
for sub in subs: for lines in chunks:
lines = list(r.split(sub)) sub = Subtitle(lines, self.file_name, line_number)
subs_objs.append(Subtitle(lines, file.name, line_number)) self.subs.append(sub)
line_number += len(lines) + 3 line_number += len(sub) + 1
# Clean if --clean is passed def clean(self, expressions):
if args.clean: if len(expressions) == 0:
subs_objs = clean(subs_objs, expressions) return
# Shift if --shift is passed # Remove lines matching any expression
if args.shift: for regexp in expressions:
subs_objs = shift(subs_objs, args.shift) subs = filter(lambda sub: not sub.matches(regexp), self.subs)
# Strip HTML if --no-html is passed self.subs = list(subs)
if args.no_html:
strip_html(subs_objs)
# Fix numbering def shift(self, ms):
for sub in self.subs:
sub.shift(ms)
self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs))
def strip_html(self):
p = re.compile('<.+?>')
for sub in self.subs:
sub.replace(p, '')
def renumber(self):
i = 1 i = 1
for sub in subs_objs: for sub in self.subs:
sub.number = i sub.number = i
i += 1 i += 1
# Join Subtitle objects back to a string def process(self, args, config):
contents = '\n\n'.join(map(repr, subs_objs)) if args.clean:
self.clean(config.expressions)
# Write output if args.shift:
output = open(file.name, 'w', encoding='utf-8') self.shift(args.shift)
output.write(contents)
if args.no_html:
self.strip_html()
self.renumber()
self.write_file()
def write_file(self):
output = open(self.file_name, 'w', encoding='utf-8')
output.write(repr(self))
output.write('\n') output.write('\n')
output.close() output.close()
def __repr__(self):
def read_expressions(args): return '\n\n'.join(map(repr, self.subs))
if args.clean:
cfg = args.config_file
# Open default config file if not specified
if not args.config_file:
home = pathlib.Path.home()
try:
if type(home) is pathlib.PosixPath:
cfg = open(str(home) + '/.config/fsubrc', 'r')
elif type(home) is pathlib.WindowsPath:
cfg = open(os.getenv('APPDATA') + r'\fsubrc', 'r')
else:
print('Unsupported operating system', file=sys.stderr)
sys.exit(1)
except FileNotFoundError:
return []
# Read expressions
lines = re.split(r'\r?\n', cfg.read().strip())
expressions = list(map(re.compile, lines))
cfg.close()
return expressions
return []
def main(): def main():
@ -264,20 +334,18 @@ def main():
# Validate options # Validate options
if not args.clean and args.config_file: if not args.clean and args.config_file:
print('-f requires -c', file=sys.stderr) panic('-f requires -c', 1)
exit(1)
# Check if all files are .srt config = ConfigFile(args)
parsed_files = []
for file in args.files: for file in args.files:
if file.name[-4:] != '.srt': parsed_files.append(SubripFile(file))
print('File {} is not a SubRip file'.format(file.name),
file=sys.stderr)
sys.exit(1)
expressions = read_expressions(args) # TODO: join, split files
for file in args.files: for file in parsed_files:
process_file(args, file, expressions) file.process(args, config)
if __name__ == '__main__': if __name__ == '__main__':