Improve code quality

2021-11-15 11:44:50 -03:00
parent 84192b0604
commit 77671d960f
1 changed files with 214 additions and 146 deletions
--- a/src/fsub/fsub.py
+++ b/src/fsub/fsub.py
@@ -26,64 +26,122 @@ import argparse
 import re
 import chardet
 import os
-import pathlib
+import copy
 from pathlib import Path, PosixPath
-class Time:
+def panic(message, code):
-    def __init__(self, time_str, file_name, line_number):
+    print(message, file=sys.stderr)
    sys.exit(code)
 class TimeStamp:
    def __init__(self, time_str):
        parsed_time = time_str.split(':')
        try:
        h = int(parsed_time[0])
        m = int(parsed_time[1])
        ms = int(parsed_time[2].replace(',', ''))
            # self.time: time in milliseconds
        self.time = h * 3600000 + m * 60000 + ms
        except Exception:
            print('Invalid time format detected ({}:{})'
                  .format(file_name, line_number),
                  file=sys.stderr)
            sys.exit(1)
-    def add(self, ms):
+    def getmilliseconds(self):
-        self.time += ms
+        return self.time % 1000
    def getseconds(self):
        return (self.time % 60000) / 1000
    def getminutes(self):
        return (self.time / 60000) % 60
    def gethours(self):
        return self.time / 3600000
    millisecods = property(getmilliseconds)
    seconds = property(getseconds)
    minutes = property(getminutes)
    hours = property(gethours)
    def __iadd__(self, other):
        t = type(other)
        if t is int:
            self.time += other
        elif t is type(self):
            self.time += other.time
        else:
            raise TypeError
        return self
    def __neg__(self):
        new = copy.deepcopy(self)
        new.time = -new.time
        return new
    def __isub__(self, other):
        return self.__iadd__(-other)
    def __lt__(self, other):
        return self.time < other.time
    def __le__(self, other):
        return self.time <= other.time
    def __eq__(self, other):
        return self.time == other.time
    def __gt__(self, other):
        return self.time > other.time
    def __ge__(self, other):
        return self.time >= other.time
    def __repr__(self):
-        ms = self.time % 1000
+        return '%02d:%02d:%02d,%03d' % \
-        s = (self.time % 60000) / 1000
+         (self.hours, self.minutes, self.seconds, self.millisecods)
        m = (self.time / 60000) % 60
        h = self.time / 3600000
        return '%02d:%02d:%02d,%03d' % (h, m, s, ms)
 class Subtitle:
    # Parse a single subtitle
    def __init__(self, lines, file_name, line_number):
        if type(lines) is str:
            lines = lines.splitlines()
        try:
            # This is mostly ignored, as the subtitles are renumbered later
            self.number = int(lines.pop(0))
        except Exception:
-            print('Invalid line number detected ({}:{})'
+            panic('Invalid line number detected ({}:{})'
-                  .format(file_name, line_number),
+                  .format(file_name, line_number), 1)
                  file=sys.stderr)
            sys.exit(1)
        line_number += 1
        try:
            time_span = lines.pop(0).split(' --> ')
            self.time_start = Time(time_span[0], file_name, line_number)
            self.time_end = Time(time_span[1], file_name, line_number)
        except Exception:
-            print('Invalid time span format detected ({}:{})'
+            panic('Invalid time span format detected ({}:{})'
-                  .format(file_name, line_number),
+                  .format(file_name, line_number), 1)
-                  file=sys.stderr)
+
-            sys.exit(1)
+        try:
            self.time_start = TimeStamp(time_span[0])
            self.time_end = TimeStamp(time_span[1])
        except Exception:
            panic('Invalid time stamp detected ({}:{})'
                  .format(file_name, line_number), 1)
        if self.time_start >= self.time_end:
            panic('End time must be greater than start time ({}:{})'
                  .format(file_name, line_number), 1)
        self.content = lines
    def __len__(self):
        return len(self.content) + 2
    def shift(self, ms):
-        self.time_start.add(ms)
+        self.time_start += ms
-        self.time_end.add(ms)
+        self.time_end += ms
    def replace(self, pattern, new_content):
        for line in self.content:
            line = pattern.replace(new_content, line)
    def matches(self, regexp):
        for line in self.content:
@@ -99,31 +157,46 @@ class Subtitle:
        )
-def clean(subs, expressions):
+class ConfigFile:
-    # Cancel if no expression
+    def __init__(self, args):
-    if len(expressions) == 0:
+        # No reason to continue
-        return subs
+        if not args.clean:
            if args.config_file:
                args.config_file.close()
            self.expressions = []
            return
-    # Remove lines matching any expression
+        file = args.config_file
-    for regexp in expressions:
+        # Set default config file if not specified
-        subs = filter(lambda sub: not sub.matches(regexp), subs)
+        if not file:
            home = Path.home()
-    return list(subs)
+            if type(home) is PosixPath:
                self.file_path = home / '.config' / 'fsubrc'
            else:
                self.file_path = Path(os.getenv('APPDATA')) / 'fsubrc'
            try:
                self.file_path.touch()
                file = self.file_path.open(mode='r')
            except PermissionError:
                panic('Can\'t access file {}: Permission denied'
                      .format(self.file_path), 1)
        else:
            self.file_path = Path(file.name)
        # Read expressions
        lines = file.read().strip().splitlines()
        file.close()
        self.expressions = list(map(re.compile, lines))
-def shift(subs, ms):
+class SubripFile:
-    for sub in subs:
+    def read_file(file):
-        sub.shift(ms)
+        # Check extension
-    return list(filter(lambda sub: sub.time_start.time >= 0, subs))
+        if file.name[-4:] != '.srt':
            panic('File {} is not a SubRip file'.format(file.name), 1)
 def strip_html(subs):
    for sub in subs:
        for i in range(0, len(sub.content)):
            sub.content[i] = re.sub('<.+?>', '', sub.content[i])
 def process_file(args, file, expressions):
        # Read the input file
        contents = file.read()
        file.close()
@@ -131,82 +204,79 @@ def process_file(args, file, expressions):
        # Decode the file contents
        encoding = chardet.detect(contents)['encoding']
        if encoding is None:
-        print('Corrupt or empty file ({})'.format(file.name),
+            panic('Corrupt or empty file ({})'.format(file.name), 1)
-              file=sys.stderr)
+        return contents.decode(encoding)
-        sys.exit(1)
+
-    contents = contents.decode(encoding)
+    # This method parses the file
    def __init__(self, file):
        self.file_name = file.name
        contents = SubripFile.read_file(file)
        # Count empty lines at the beginning
    r = re.compile(r'\r?\n')
        line_number = 1
-    for line in r.split(contents):
+        for line in contents.splitlines():
            if len(line) == 0 or line.isspace():
                line_number += 1
            else:
                break
        # Split subtitles on empty lines
-    subs = re.split(r'(?:\r?\n){2}', contents.strip())
+        chunks = re.split(r'(?:\r?\n){2}', contents.strip())
        # Create Subtitle objects
-    subs_objs = []
+        self.subs = []
-    for sub in subs:
+        for lines in chunks:
-        lines = list(r.split(sub))
+            sub = Subtitle(lines, self.file_name, line_number)
-        subs_objs.append(Subtitle(lines, file.name, line_number))
+            self.subs.append(sub)
-        line_number += len(lines) + 3
+            line_number += len(sub) + 1
-    # Clean if --clean is passed
+    def clean(self, expressions):
-    if args.clean:
+        if len(expressions) == 0:
-        subs_objs = clean(subs_objs, expressions)
+            return
-    # Shift if --shift is passed
+        # Remove lines matching any expression
-    if args.shift:
+        for regexp in expressions:
-        subs_objs = shift(subs_objs, args.shift)
+            subs = filter(lambda sub: not sub.matches(regexp), self.subs)
-    # Strip HTML if --no-html is passed
+        self.subs = list(subs)
    if args.no_html:
        strip_html(subs_objs)
-    # Fix numbering
+    def shift(self, ms):
        for sub in self.subs:
            sub.shift(ms)
        self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs))
    def strip_html(self):
        p = re.compile('<.+?>')
        for sub in self.subs:
            sub.replace(p, '')
    def renumber(self):
        i = 1
-    for sub in subs_objs:
+        for sub in self.subs:
            sub.number = i
            i += 1
-    # Join Subtitle objects back to a string
+    def process(self, args, config):
-    contents = '\n\n'.join(map(repr, subs_objs))
+        if args.clean:
            self.clean(config.expressions)
-    # Write output
+        if args.shift:
-    output = open(file.name, 'w', encoding='utf-8')
+            self.shift(args.shift)
-    output.write(contents)
+
        if args.no_html:
            self.strip_html()
        self.renumber()
        self.write_file()
    def write_file(self):
        output = open(self.file_name, 'w', encoding='utf-8')
        output.write(repr(self))
        output.write('\n')
        output.close()
-
+    def __repr__(self):
-def read_expressions(args):
+        return '\n\n'.join(map(repr, self.subs))
    if args.clean:
        cfg = args.config_file
        # Open default config file if not specified
        if not args.config_file:
            home = pathlib.Path.home()
            try:
                if type(home) is pathlib.PosixPath:
                    cfg = open(str(home) + '/.config/fsubrc', 'r')
                elif type(home) is pathlib.WindowsPath:
                    cfg = open(os.getenv('APPDATA') + r'\fsubrc', 'r')
                else:
                    print('Unsupported operating system', file=sys.stderr)
                    sys.exit(1)
            except FileNotFoundError:
                return []
        # Read expressions
        lines = re.split(r'\r?\n', cfg.read().strip())
        expressions = list(map(re.compile, lines))
        cfg.close()
        return expressions
    return []
 def main():
@@ -264,20 +334,18 @@ def main():
    # Validate options
    if not args.clean and args.config_file:
-        print('-f requires -c', file=sys.stderr)
+        panic('-f requires -c', 1)
        exit(1)
-    # Check if all files are .srt
+    config = ConfigFile(args)
    parsed_files = []
    for file in args.files:
-        if file.name[-4:] != '.srt':
+        parsed_files.append(SubripFile(file))
            print('File {} is not a SubRip file'.format(file.name),
                  file=sys.stderr)
            sys.exit(1)
-    expressions = read_expressions(args)
+    # TODO: join, split files
-    for file in args.files:
+    for file in parsed_files:
-        process_file(args, file, expressions)
+        file.process(args, config)
 if __name__ == '__main__':