Improve code quality
This commit is contained in:
parent
84192b0604
commit
77671d960f
294
src/fsub/fsub.py
294
src/fsub/fsub.py
|
@ -26,64 +26,122 @@ import argparse
|
||||||
import re
|
import re
|
||||||
import chardet
|
import chardet
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import copy
|
||||||
|
from pathlib import Path, PosixPath
|
||||||
|
|
||||||
|
|
||||||
class Time:
|
def panic(message, code):
|
||||||
def __init__(self, time_str, file_name, line_number):
|
print(message, file=sys.stderr)
|
||||||
|
sys.exit(code)
|
||||||
|
|
||||||
|
|
||||||
|
class TimeStamp:
|
||||||
|
def __init__(self, time_str):
|
||||||
parsed_time = time_str.split(':')
|
parsed_time = time_str.split(':')
|
||||||
try:
|
|
||||||
h = int(parsed_time[0])
|
h = int(parsed_time[0])
|
||||||
m = int(parsed_time[1])
|
m = int(parsed_time[1])
|
||||||
ms = int(parsed_time[2].replace(',', ''))
|
ms = int(parsed_time[2].replace(',', ''))
|
||||||
# self.time: time in milliseconds
|
|
||||||
self.time = h * 3600000 + m * 60000 + ms
|
self.time = h * 3600000 + m * 60000 + ms
|
||||||
except Exception:
|
|
||||||
print('Invalid time format detected ({}:{})'
|
|
||||||
.format(file_name, line_number),
|
|
||||||
file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def add(self, ms):
|
def getmilliseconds(self):
|
||||||
self.time += ms
|
return self.time % 1000
|
||||||
|
|
||||||
|
def getseconds(self):
|
||||||
|
return (self.time % 60000) / 1000
|
||||||
|
|
||||||
|
def getminutes(self):
|
||||||
|
return (self.time / 60000) % 60
|
||||||
|
|
||||||
|
def gethours(self):
|
||||||
|
return self.time / 3600000
|
||||||
|
|
||||||
|
millisecods = property(getmilliseconds)
|
||||||
|
seconds = property(getseconds)
|
||||||
|
minutes = property(getminutes)
|
||||||
|
hours = property(gethours)
|
||||||
|
|
||||||
|
def __iadd__(self, other):
|
||||||
|
t = type(other)
|
||||||
|
if t is int:
|
||||||
|
self.time += other
|
||||||
|
elif t is type(self):
|
||||||
|
self.time += other.time
|
||||||
|
else:
|
||||||
|
raise TypeError
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __neg__(self):
|
||||||
|
new = copy.deepcopy(self)
|
||||||
|
new.time = -new.time
|
||||||
|
return new
|
||||||
|
|
||||||
|
def __isub__(self, other):
|
||||||
|
return self.__iadd__(-other)
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
return self.time < other.time
|
||||||
|
|
||||||
|
def __le__(self, other):
|
||||||
|
return self.time <= other.time
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.time == other.time
|
||||||
|
|
||||||
|
def __gt__(self, other):
|
||||||
|
return self.time > other.time
|
||||||
|
|
||||||
|
def __ge__(self, other):
|
||||||
|
return self.time >= other.time
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
ms = self.time % 1000
|
return '%02d:%02d:%02d,%03d' % \
|
||||||
s = (self.time % 60000) / 1000
|
(self.hours, self.minutes, self.seconds, self.millisecods)
|
||||||
m = (self.time / 60000) % 60
|
|
||||||
h = self.time / 3600000
|
|
||||||
return '%02d:%02d:%02d,%03d' % (h, m, s, ms)
|
|
||||||
|
|
||||||
|
|
||||||
class Subtitle:
|
class Subtitle:
|
||||||
|
# Parse a single subtitle
|
||||||
def __init__(self, lines, file_name, line_number):
|
def __init__(self, lines, file_name, line_number):
|
||||||
|
if type(lines) is str:
|
||||||
|
lines = lines.splitlines()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# This is mostly ignored, as the subtitles are renumbered later
|
# This is mostly ignored, as the subtitles are renumbered later
|
||||||
self.number = int(lines.pop(0))
|
self.number = int(lines.pop(0))
|
||||||
except Exception:
|
except Exception:
|
||||||
print('Invalid line number detected ({}:{})'
|
panic('Invalid line number detected ({}:{})'
|
||||||
.format(file_name, line_number),
|
.format(file_name, line_number), 1)
|
||||||
file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
line_number += 1
|
line_number += 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
time_span = lines.pop(0).split(' --> ')
|
time_span = lines.pop(0).split(' --> ')
|
||||||
|
|
||||||
self.time_start = Time(time_span[0], file_name, line_number)
|
|
||||||
self.time_end = Time(time_span[1], file_name, line_number)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
print('Invalid time span format detected ({}:{})'
|
panic('Invalid time span format detected ({}:{})'
|
||||||
.format(file_name, line_number),
|
.format(file_name, line_number), 1)
|
||||||
file=sys.stderr)
|
|
||||||
sys.exit(1)
|
try:
|
||||||
|
self.time_start = TimeStamp(time_span[0])
|
||||||
|
self.time_end = TimeStamp(time_span[1])
|
||||||
|
except Exception:
|
||||||
|
panic('Invalid time stamp detected ({}:{})'
|
||||||
|
.format(file_name, line_number), 1)
|
||||||
|
|
||||||
|
if self.time_start >= self.time_end:
|
||||||
|
panic('End time must be greater than start time ({}:{})'
|
||||||
|
.format(file_name, line_number), 1)
|
||||||
|
|
||||||
self.content = lines
|
self.content = lines
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.content) + 2
|
||||||
|
|
||||||
def shift(self, ms):
|
def shift(self, ms):
|
||||||
self.time_start.add(ms)
|
self.time_start += ms
|
||||||
self.time_end.add(ms)
|
self.time_end += ms
|
||||||
|
|
||||||
|
def replace(self, pattern, new_content):
|
||||||
|
for line in self.content:
|
||||||
|
line = pattern.replace(new_content, line)
|
||||||
|
|
||||||
def matches(self, regexp):
|
def matches(self, regexp):
|
||||||
for line in self.content:
|
for line in self.content:
|
||||||
|
@ -99,31 +157,46 @@ class Subtitle:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def clean(subs, expressions):
|
class ConfigFile:
|
||||||
# Cancel if no expression
|
def __init__(self, args):
|
||||||
if len(expressions) == 0:
|
# No reason to continue
|
||||||
return subs
|
if not args.clean:
|
||||||
|
if args.config_file:
|
||||||
|
args.config_file.close()
|
||||||
|
self.expressions = []
|
||||||
|
return
|
||||||
|
|
||||||
# Remove lines matching any expression
|
file = args.config_file
|
||||||
for regexp in expressions:
|
# Set default config file if not specified
|
||||||
subs = filter(lambda sub: not sub.matches(regexp), subs)
|
if not file:
|
||||||
|
home = Path.home()
|
||||||
|
|
||||||
return list(subs)
|
if type(home) is PosixPath:
|
||||||
|
self.file_path = home / '.config' / 'fsubrc'
|
||||||
|
else:
|
||||||
|
self.file_path = Path(os.getenv('APPDATA')) / 'fsubrc'
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.file_path.touch()
|
||||||
|
file = self.file_path.open(mode='r')
|
||||||
|
except PermissionError:
|
||||||
|
panic('Can\'t access file {}: Permission denied'
|
||||||
|
.format(self.file_path), 1)
|
||||||
|
else:
|
||||||
|
self.file_path = Path(file.name)
|
||||||
|
|
||||||
|
# Read expressions
|
||||||
|
lines = file.read().strip().splitlines()
|
||||||
|
file.close()
|
||||||
|
self.expressions = list(map(re.compile, lines))
|
||||||
|
|
||||||
|
|
||||||
def shift(subs, ms):
|
class SubripFile:
|
||||||
for sub in subs:
|
def read_file(file):
|
||||||
sub.shift(ms)
|
# Check extension
|
||||||
return list(filter(lambda sub: sub.time_start.time >= 0, subs))
|
if file.name[-4:] != '.srt':
|
||||||
|
panic('File {} is not a SubRip file'.format(file.name), 1)
|
||||||
|
|
||||||
|
|
||||||
def strip_html(subs):
|
|
||||||
for sub in subs:
|
|
||||||
for i in range(0, len(sub.content)):
|
|
||||||
sub.content[i] = re.sub('<.+?>', '', sub.content[i])
|
|
||||||
|
|
||||||
|
|
||||||
def process_file(args, file, expressions):
|
|
||||||
# Read the input file
|
# Read the input file
|
||||||
contents = file.read()
|
contents = file.read()
|
||||||
file.close()
|
file.close()
|
||||||
|
@ -131,82 +204,79 @@ def process_file(args, file, expressions):
|
||||||
# Decode the file contents
|
# Decode the file contents
|
||||||
encoding = chardet.detect(contents)['encoding']
|
encoding = chardet.detect(contents)['encoding']
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
print('Corrupt or empty file ({})'.format(file.name),
|
panic('Corrupt or empty file ({})'.format(file.name), 1)
|
||||||
file=sys.stderr)
|
return contents.decode(encoding)
|
||||||
sys.exit(1)
|
|
||||||
contents = contents.decode(encoding)
|
# This method parses the file
|
||||||
|
def __init__(self, file):
|
||||||
|
self.file_name = file.name
|
||||||
|
contents = SubripFile.read_file(file)
|
||||||
|
|
||||||
# Count empty lines at the beginning
|
# Count empty lines at the beginning
|
||||||
r = re.compile(r'\r?\n')
|
|
||||||
line_number = 1
|
line_number = 1
|
||||||
for line in r.split(contents):
|
for line in contents.splitlines():
|
||||||
if len(line) == 0 or line.isspace():
|
if len(line) == 0 or line.isspace():
|
||||||
line_number += 1
|
line_number += 1
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Split subtitles on empty lines
|
# Split subtitles on empty lines
|
||||||
subs = re.split(r'(?:\r?\n){2}', contents.strip())
|
chunks = re.split(r'(?:\r?\n){2}', contents.strip())
|
||||||
|
|
||||||
# Create Subtitle objects
|
# Create Subtitle objects
|
||||||
subs_objs = []
|
self.subs = []
|
||||||
for sub in subs:
|
for lines in chunks:
|
||||||
lines = list(r.split(sub))
|
sub = Subtitle(lines, self.file_name, line_number)
|
||||||
subs_objs.append(Subtitle(lines, file.name, line_number))
|
self.subs.append(sub)
|
||||||
line_number += len(lines) + 3
|
line_number += len(sub) + 1
|
||||||
|
|
||||||
# Clean if --clean is passed
|
def clean(self, expressions):
|
||||||
if args.clean:
|
if len(expressions) == 0:
|
||||||
subs_objs = clean(subs_objs, expressions)
|
return
|
||||||
|
|
||||||
# Shift if --shift is passed
|
# Remove lines matching any expression
|
||||||
if args.shift:
|
for regexp in expressions:
|
||||||
subs_objs = shift(subs_objs, args.shift)
|
subs = filter(lambda sub: not sub.matches(regexp), self.subs)
|
||||||
|
|
||||||
# Strip HTML if --no-html is passed
|
self.subs = list(subs)
|
||||||
if args.no_html:
|
|
||||||
strip_html(subs_objs)
|
|
||||||
|
|
||||||
# Fix numbering
|
def shift(self, ms):
|
||||||
|
for sub in self.subs:
|
||||||
|
sub.shift(ms)
|
||||||
|
self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs))
|
||||||
|
|
||||||
|
def strip_html(self):
|
||||||
|
p = re.compile('<.+?>')
|
||||||
|
for sub in self.subs:
|
||||||
|
sub.replace(p, '')
|
||||||
|
|
||||||
|
def renumber(self):
|
||||||
i = 1
|
i = 1
|
||||||
for sub in subs_objs:
|
for sub in self.subs:
|
||||||
sub.number = i
|
sub.number = i
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
# Join Subtitle objects back to a string
|
def process(self, args, config):
|
||||||
contents = '\n\n'.join(map(repr, subs_objs))
|
if args.clean:
|
||||||
|
self.clean(config.expressions)
|
||||||
|
|
||||||
# Write output
|
if args.shift:
|
||||||
output = open(file.name, 'w', encoding='utf-8')
|
self.shift(args.shift)
|
||||||
output.write(contents)
|
|
||||||
|
if args.no_html:
|
||||||
|
self.strip_html()
|
||||||
|
|
||||||
|
self.renumber()
|
||||||
|
self.write_file()
|
||||||
|
|
||||||
|
def write_file(self):
|
||||||
|
output = open(self.file_name, 'w', encoding='utf-8')
|
||||||
|
output.write(repr(self))
|
||||||
output.write('\n')
|
output.write('\n')
|
||||||
output.close()
|
output.close()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
def read_expressions(args):
|
return '\n\n'.join(map(repr, self.subs))
|
||||||
if args.clean:
|
|
||||||
cfg = args.config_file
|
|
||||||
|
|
||||||
# Open default config file if not specified
|
|
||||||
if not args.config_file:
|
|
||||||
home = pathlib.Path.home()
|
|
||||||
try:
|
|
||||||
if type(home) is pathlib.PosixPath:
|
|
||||||
cfg = open(str(home) + '/.config/fsubrc', 'r')
|
|
||||||
elif type(home) is pathlib.WindowsPath:
|
|
||||||
cfg = open(os.getenv('APPDATA') + r'\fsubrc', 'r')
|
|
||||||
else:
|
|
||||||
print('Unsupported operating system', file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
except FileNotFoundError:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Read expressions
|
|
||||||
lines = re.split(r'\r?\n', cfg.read().strip())
|
|
||||||
expressions = list(map(re.compile, lines))
|
|
||||||
cfg.close()
|
|
||||||
return expressions
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -264,20 +334,18 @@ def main():
|
||||||
|
|
||||||
# Validate options
|
# Validate options
|
||||||
if not args.clean and args.config_file:
|
if not args.clean and args.config_file:
|
||||||
print('-f requires -c', file=sys.stderr)
|
panic('-f requires -c', 1)
|
||||||
exit(1)
|
|
||||||
|
|
||||||
# Check if all files are .srt
|
config = ConfigFile(args)
|
||||||
|
|
||||||
|
parsed_files = []
|
||||||
for file in args.files:
|
for file in args.files:
|
||||||
if file.name[-4:] != '.srt':
|
parsed_files.append(SubripFile(file))
|
||||||
print('File {} is not a SubRip file'.format(file.name),
|
|
||||||
file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
expressions = read_expressions(args)
|
# TODO: join, split files
|
||||||
|
|
||||||
for file in args.files:
|
for file in parsed_files:
|
||||||
process_file(args, file, expressions)
|
file.process(args, config)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue