Implement new flags

2021-11-17 20:45:38 -03:00 · 2021-11-17 20:45:38 -03:00 · d9c25e1b27
commit d9c25e1b27
parent e51110847e
11 changed files with 282 additions and 75 deletions
--- a/README.md
+++ b/README.md
@ -9,8 +9,6 @@ pip install fsub
 # Usage
 ```
 usage: fsub [-h] [-c] [-s MS] [-n] [-f F] [-j] [-r] file [file ...]
 Fix, edit and clean SubRip (.srt) files.
 positional arguments:
@ -18,14 +16,30 @@ positional arguments:
 optional arguments:
  -h, --help         show this help message and exit
  -f F, --config F   use F as the config file (by default, on Unix it is:
                     "$HOME/.config/fsubrc"; on Windows it is: "%APPDATA%\fsubrc")
  -r, --replace      edit files in-place (--join will delete joined files too), instead of the
                     default behavior of outputing results into files prefixed with "out-"
  -p, --stdout       dump results to stdout, and do not edit nor write any file
 processing:
  Flags that specify an action to be taken. Many may be specified.
  -c, --clean        remove subtitles matching regular expressions listed in the config file
                     (this is the default behavior if no other flag is passed)
  -s MS, --shift MS  shift all subtitles by MS milliseconds, which may be positive or negative
  -n, --no-html      strip HTML tags from subtitles content
  -f F, --config F   use F as the config file (by default, F is: on Unix:
                     $HOME/.config/fsubrc; on Windows: %APPDATA%\fsubrc)
  -j, --join         join all files into the first, shifting their time accordingly
-  -r, --replace      edit files in-place (-j will delete joined files too)
+  -u, --cut-out      cut the specified section from the file(s) into new files
 sectioning:
  Flags that specify a section to work in. They accept either a subtitle number or a time
  stamp in the SubRip format ("<hours>:<minutes>:<seconds>,<milliseconds>", where hours,
  minutes, seconds are 2-zero padded while milliseconds is 3-zero padded). fsub will not
  modify subtitles outside this range, except while joining the files.
  -b B, --begin B    specify section beginning (inclusive)
  -e E, --end E      specify section end (inclusive)
 ```
 # Testing
--- a/setup.cfg
+++ b/setup.cfg
@ -1,6 +1,6 @@
 [metadata]
 name = fsub
-version = 0.1.3
+version = 1.0.0
 author = Augusto Lenz Gunsch
 author_email = augustogunsch@tutanota.com
 description = CLI SubRip editor
--- a/src/fsub/fsub.py
+++ b/src/fsub/fsub.py
@ -96,6 +96,30 @@ class TimeStamp:
         (self.hours, self.minutes, self.seconds, self.millisecods)
 class SectionMarker:
    def __init__(self, arg):
        try:
            self.marker = TimeStamp(arg)
        except Exception:
            try:
                self.marker = int(arg)
            except Exception:
                panic('Invalid section marker argument', 1)
    def include_after(self, other):
        if type(self.marker) is TimeStamp:
            return other.time_start >= self.marker
        return other.number >= self.marker
    def include_before(self, other):
        if type(self.marker) is TimeStamp:
            return other.time_end <= self.marker
        return other.number <= self.marker
    def __le__(self, other):
        return int(self) <= int(other)
 class Subtitle:
    # Parse a single subtitle
    def __init__(self, lines, file_name, line_number):
@ -235,18 +259,10 @@ class SubripFile:
        self.subs += other.subs
        return self
    def clean(self, expressions):
        if len(expressions) == 0:
            return
        # Remove lines matching any expression
        for regexp in expressions:
            self.subs = [sub for sub in self.subs if not sub.matches(regexp)]
    def shift(self, ms):
        for sub in self.subs:
            sub.shift(ms)
-        self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs))
+        self.subs = [sub for sub in self.subs if sub.time_start >= 0]
    def strip_html(self):
        p = re.compile('<.+?>')
@ -260,26 +276,54 @@ class SubripFile:
            i += 1
    def process(self, args, config):
-        if args.clean:
+        html_regex = re.compile('<.+?>')
-            self.clean(config.expressions)
+        new_subs = []
        for sub in self.subs:
            if args.begin and not args.begin.include_after(sub):
                new_subs.append(sub)
                continue
-        if args.shift:
+            if args.end and not args.end.include_before(sub):
-            self.shift(args.shift)
+                new_subs.append(sub)
                continue
-        if args.no_html:
+            if args.clean and len(config.expressions) > 0:
-            self.strip_html()
+                if any(sub.matches(regex) for regex in config.expressions):
                    continue
            if args.shift:
                sub.shift(args.shift)
                if sub.time_start < 0:
                    continue
            if args.no_html:
                sub.replace(html_regex, '')
            new_subs.append(sub)
        self.subs = new_subs
        self.renumber()
        self.write_file(args.replace)
    def write_file(self, in_place=False, stdout=False):
        self.renumber()
        if stdout:
            print(self)
            return
-        file = self.file_name if in_place else 'out-' + self.file_name
+        try:
            if in_place:
                path = self.file_name
                output = open(path, 'w', encoding='utf-8')
            else:
                path = Path(self.file_name)
                path = path.with_name('out-' + path.name)
                output = path.open(mode='w', encoding='utf-8')
        except PermissionError:
            panic('Can\'t access file {}: Permission denied'
                  .format(path), 1)
        output = open(file, 'w', encoding='utf-8')
        output.write(repr(self))
        if len(self.subs) > 0:
@ -291,6 +335,12 @@ class SubripFile:
        os.remove(self.file_name)
        del self
    def trunc_before(self, marker):
        self.subs = [sub for sub in self.subs if marker.include_after(sub)]
    def trunc_after(self, marker):
        self.subs = [sub for sub in self.subs if marker.include_before(sub)]
    def __repr__(self):
        return '\n\n'.join(map(repr, self.subs))
@ -302,29 +352,6 @@ def parse_args(args):
        add_help=True
    )
    parser.add_argument(
        '-c', '--clean',
        help='remove subtitles matching regular expressions ' +
             'listed in the config file (this is the default ' +
             'behavior if no other flag is passed)',
        action='store_true'
    )
    parser.add_argument(
        '-s', '--shift',
        help='shift all subtitles by MS milliseconds, which ' +
             'may be positive or negative',
        metavar='MS',
        action='store',
        type=int
    )
    parser.add_argument(
        '-n', '--no-html',
        help='strip HTML tags from subtitles content',
        action='store_true'
    )
    # Requires --clean
    parser.add_argument(
        '-f', '--config',
@ -335,18 +362,45 @@ def parse_args(args):
        type=argparse.FileType('r')
    )
-    parser.add_argument(
+    processing = parser.add_argument_group(
        'processing',
        'Flags that specify an action to be taken. Many may ' +
        'be specified.'
    )
    processing.add_argument(
        '-c', '--clean',
        help='remove subtitles matching regular expressions ' +
             'listed in the config file (this is the default ' +
             'behavior if no other flag is passed)',
        action='store_true'
    )
    processing.add_argument(
        '-s', '--shift',
        help='shift all subtitles by MS milliseconds, which ' +
             'may be positive or negative',
        metavar='MS',
        action='store',
        type=int
    )
    processing.add_argument(
        '-n', '--no-html',
        help='strip HTML tags from subtitles content',
        action='store_true'
    )
    processing.add_argument(
        '-j', '--join',
        help='join all files into the first, shifting their time accordingly',
        action='store_true'
    )
-    # Requires --begin
+    # Requires --begin or --end, may have both
-    parser.add_argument(
+    processing.add_argument(
        '-u', '--cut-out',
-        help='cut out the specified section from the file(s), creating ' +
+        help='cut the specified section from the file(s) into new files',
             'for every input file a new one prefixed with "cut-" ' +
             '(--join will join both the input files and the cutouts)',
        action='store_true'
    )
@ -385,29 +439,39 @@ def parse_args(args):
    section.add_argument(
        '-b', '--begin',
-        help='specify section beginning (by default, beginning of file)',
+        help='specify section beginning (inclusive)',
        metavar='B',
        action='store'
    )
    section.add_argument(
        '-e', '--end',
-        help='specify section end (by default, end of file)',
+        help='specify section end (inclusive)',
        metavar='E',
        action='store'
    )
    args = parser.parse_args(args)
    # Flags that require section
    if args.cut_out:
        if not args.begin and not args.end:
            panic('You must specify a section to work with', 1)
    # Make sure --clean is the default
-    # TODO: account for new options
+    if not any((args.shift, args.no_html, args.join, args.cut_out)):
    if not args.shift and not args.no_html:
        args.clean = True
    # Validate options
    if not args.clean and args.config:
        panic('-f requires -c', 1)
    if args.begin:
        args.begin = SectionMarker(args.begin)
    if args.end:
        args.end = SectionMarker(args.end)
    return args
@ -419,6 +483,15 @@ def run(args):
    for file in args.files:
        parsed_files.append(SubripFile(file))
    if args.cut_out:
        if args.begin:
            for file in parsed_files:
                file.trunc_before(args.begin)
        if args.end:
            for file in parsed_files:
                file.trunc_after(args.end)
    if args.join:
        first = parsed_files.pop(0)
        while True:
@ -429,7 +502,6 @@ def run(args):
            except IndexError:
                break
        parsed_files.append(first)
        first.renumber()
    for file in parsed_files:
        file.process(args, config)
--- a/tests/integration.py
+++ b/tests/integration.py
@ -9,39 +9,39 @@ from pathlib import Path
 class TestFsub(unittest.TestCase):
    samples = Path('tests/samples')
-    def run_on(self, args, samples, ofiles, replace=False):
+    def run_on(self, args, samples, expect_out_files, replace=False):
        caller = inspect.stack()[1][3]
-        ifiles = []
+        cloned_samples = []
        samples = map(lambda s: str(self.samples / s) + '.srt', samples)
        i = 1
        for sample in samples:
-            ifile = str(i) + '.' + caller + '.srt'
+            cloned_sample = str(i) + '.' + caller + '.srt'
-            shutil.copy(sample, ifile)
+            shutil.copy(sample, cloned_sample)
-            args.append(ifile)
+            args.append(cloned_sample)
-            ifiles.append(ifile)
+            cloned_samples.append(cloned_sample)
            i += 1
        fsub.run(args)
-        limit = len(ofiles)
+        limit = len(expect_out_files)
-        for i, ifile in enumerate(ifiles):
+        for i, cloned_sample in enumerate(cloned_samples):
            if i < limit:
                if not replace:
-                    os.remove(ifile)
+                    os.remove(cloned_sample)
-                    ifile = 'out-' + ifile
+                    cloned_sample = 'out-' + cloned_sample
-                out = open(ifile)
+                out = open(cloned_sample)
                result = out.read()
                out.close()
-                ofile = str(self.samples / ofiles[i]) + '.srt'
+                expect_out_file = str(self.samples/expect_out_files[i])+'.srt'
-                cmp_file = open(ofile)
+                expect_out_file = open(expect_out_file)
-                cmp = cmp_file.read()
+                expect_out = expect_out_file.read()
-                cmp_file.close()
+                expect_out_file.close()
-                self.assertEqual(result, cmp)
+                self.assertEqual(result, expect_out)
            try:
-                os.remove(ifile)
+                os.remove(cloned_sample)
            except FileNotFoundError:
                pass
@ -49,9 +49,17 @@ class TestFsub(unittest.TestCase):
        args = ['-f', str(self.samples / 'blacklist')]
        self.run_on(args, ['sample1'], ['sample1-cleaned'])
    def test_cleaned_begin(self):
        args = ['-f', str(self.samples / 'blacklist'), '-b', '3']
        self.run_on(args, ['sample1'], ['sample1-cleaned-begin'])
    def test_stripped(self):
        self.run_on(['-n'], ['sample1'], ['sample1-stripped'])
    def test_stripped_end(self):
        args = ['-n', '-e', '00:00:55,500']
        self.run_on(args, ['sample1'], ['sample1-stripped-end'])
    def test_cleaned_stripped(self):
        args = ['-c', '-f', str(self.samples / 'blacklist'), '-n']
        self.run_on(args, ['sample1'], ['sample1-cleaned-stripped'])
@ -71,11 +79,32 @@ class TestFsub(unittest.TestCase):
        args = ['-s', '-52000']
        self.run_on(args, ['sample1'], ['sample1-shifted-minus-52s'])
    def test_shifted_minus_1h_begin(self):
        args = ['-s', '-3600000', '-b', '00:00:53,500']
        self.run_on(args, ['sample1'], ['sample1-shifted-minus-1h-begin'])
    def test_joined(self):
        args = ['-j']
        self.run_on(args, ['sample1', 'sample2', 'sample3'],
                          ['sample1-sample2-sample3-joined'])
    def test_cut_begin(self):
        args = ['-b', '2', '-u']
        self.run_on(args, ['sample1'], ['sample1-cut-out-begin'])
    def test_cut_end(self):
        args = ['-e', '1', '-u']
        self.run_on(args, ['sample1'], ['sample1-cut-out-end'])
    def test_cut_begin_end(self):
        args = ['-b', '2', '-e', '4', '-u']
        self.run_on(args, ['sample1'], ['sample1-cut-out-begin-end'])
    def test_cut_end_joined(self):
        args = ['-e', '1', '-u', '-j']
        self.run_on(args, ['sample1', 'sample3'],
                          ['sample1-sample3-cut-out-end-joined'])
 if __name__ == '__main__':
    unittest.main()
--- a/tests/samples/sample1-cleaned-begin.srt
+++ b/tests/samples/sample1-cleaned-begin.srt
@ -0,0 +1,17 @@
 1
 00:00:48,900 --> 00:00:49,800
 <b>This one is full of HTML tags.</b>
 <i>Above, below, everywhere</i>
 2
 00:00:51,800 --> 00:00:52,700
 <a href='dummy'>Even <a>'s!</a>
 3
 00:00:56,000 --> 00:00:57,000
 <p>It should just strip all of
 them mercilessly</p>
 4
 00:00:58,100 --> 00:00:59,600
 <ul>Including this one!</ul>
--- a/tests/samples/sample1-cut-out-begin-end.srt
+++ b/tests/samples/sample1-cut-out-begin-end.srt
@ -0,0 +1,14 @@
 1
 00:00:51,800 --> 00:00:52,700
 <a href='dummy'>Even <a>'s!</a>
 2
 00:00:53,500 --> 00:00:55,200
 <html>The script should not
 care whether the tag is
 valid or not</html>
 3
 00:00:56,000 --> 00:00:57,000
 <p>It should just strip all of
 them mercilessly</p>
--- a/tests/samples/sample1-cut-out-begin.srt
+++ b/tests/samples/sample1-cut-out-begin.srt
@ -0,0 +1,18 @@
 1
 00:00:51,800 --> 00:00:52,700
 <a href='dummy'>Even <a>'s!</a>
 2
 00:00:53,500 --> 00:00:55,200
 <html>The script should not
 care whether the tag is
 valid or not</html>
 3
 00:00:56,000 --> 00:00:57,000
 <p>It should just strip all of
 them mercilessly</p>
 4
 00:00:58,100 --> 00:00:59,600
 <ul>Including this one!</ul>
--- a/tests/samples/sample1-cut-out-end.srt
+++ b/tests/samples/sample1-cut-out-end.srt
@ -0,0 +1,4 @@
 1
 00:00:48,900 --> 00:00:49,800
 <b>This one is full of HTML tags.</b>
 <i>Above, below, everywhere</i>
--- a/tests/samples/sample1-sample3-cut-out-end-joined.srt
+++ b/tests/samples/sample1-sample3-cut-out-end-joined.srt
@ -0,0 +1,8 @@
 1
 00:00:48,900 --> 00:00:49,800
 <b>This one is full of HTML tags.</b>
 <i>Above, below, everywhere</i>
 2
 10:03:49,800 --> 10:05:02,000
 This one has even more whitespace!
--- a/tests/samples/sample1-shifted-minus-1h-begin.srt
+++ b/tests/samples/sample1-shifted-minus-1h-begin.srt
@ -0,0 +1,8 @@
 1
 00:00:48,900 --> 00:00:49,800
 <b>This one is full of HTML tags.</b>
 <i>Above, below, everywhere</i>
 2
 00:00:51,800 --> 00:00:52,700
 <a href='dummy'>Even <a>'s!</a>
--- a/tests/samples/sample1-stripped-end.srt
+++ b/tests/samples/sample1-stripped-end.srt
@ -0,0 +1,23 @@
 1
 00:00:48,900 --> 00:00:49,800
 This one is full of HTML tags.
 Above, below, everywhere
 2
 00:00:51,800 --> 00:00:52,700
 Even 's!
 3
 00:00:53,500 --> 00:00:55,200
 The script should not
 care whether the tag is
 valid or not
 4
 00:00:56,000 --> 00:00:57,000
 <p>It should just strip all of
 them mercilessly</p>
 5
 00:00:58,100 --> 00:00:59,600
 <ul>Including this one!</ul>