Implement new flags

2021-11-17 20:45:38 -03:00
parent e51110847e
commit d9c25e1b27
11 changed files with 282 additions and 75 deletions
--- a/README.md
+++ b/README.md
@@ -9,8 +9,6 @@ pip install fsub

 # Usage
 ```
-usage: fsub [-h] [-c] [-s MS] [-n] [-f F] [-j] [-r] file [file ...]
-
 Fix, edit and clean SubRip (.srt) files.

 positional arguments:
@@ -18,14 +16,30 @@ positional arguments:

 optional arguments:
  -h, --help         show this help message and exit
+  -f F, --config F   use F as the config file (by default, on Unix it is:
+                     "$HOME/.config/fsubrc"; on Windows it is: "%APPDATA%\fsubrc")
+  -r, --replace      edit files in-place (--join will delete joined files too), instead of the
+                     default behavior of outputing results into files prefixed with "out-"
+  -p, --stdout       dump results to stdout, and do not edit nor write any file
+
+processing:
+  Flags that specify an action to be taken. Many may be specified.
+
  -c, --clean        remove subtitles matching regular expressions listed in the config file
                     (this is the default behavior if no other flag is passed)
  -s MS, --shift MS  shift all subtitles by MS milliseconds, which may be positive or negative
  -n, --no-html      strip HTML tags from subtitles content
-  -f F, --config F   use F as the config file (by default, F is: on Unix:
-                     $HOME/.config/fsubrc; on Windows: %APPDATA%\fsubrc)
  -j, --join         join all files into the first, shifting their time accordingly
-  -r, --replace      edit files in-place (-j will delete joined files too)
+  -u, --cut-out      cut the specified section from the file(s) into new files
+
+sectioning:
+  Flags that specify a section to work in. They accept either a subtitle number or a time
+  stamp in the SubRip format ("<hours>:<minutes>:<seconds>,<milliseconds>", where hours,
+  minutes, seconds are 2-zero padded while milliseconds is 3-zero padded). fsub will not
+  modify subtitles outside this range, except while joining the files.
+
+  -b B, --begin B    specify section beginning (inclusive)
+  -e E, --end E      specify section end (inclusive)
 ```

 # Testing
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = fsub
-version = 0.1.3
+version = 1.0.0
 author = Augusto Lenz Gunsch
 author_email = augustogunsch@tutanota.com
 description = CLI SubRip editor
--- a/src/fsub/fsub.py
+++ b/src/fsub/fsub.py
@@ -96,6 +96,30 @@ class TimeStamp:
         (self.hours, self.minutes, self.seconds, self.millisecods)


+class SectionMarker:
+    def __init__(self, arg):
+        try:
+            self.marker = TimeStamp(arg)
+        except Exception:
+            try:
+                self.marker = int(arg)
+            except Exception:
+                panic('Invalid section marker argument', 1)
+
+    def include_after(self, other):
+        if type(self.marker) is TimeStamp:
+            return other.time_start >= self.marker
+        return other.number >= self.marker
+
+    def include_before(self, other):
+        if type(self.marker) is TimeStamp:
+            return other.time_end <= self.marker
+        return other.number <= self.marker
+
+    def __le__(self, other):
+        return int(self) <= int(other)
+
+
 class Subtitle:
    # Parse a single subtitle
    def __init__(self, lines, file_name, line_number):
@@ -235,18 +259,10 @@ class SubripFile:
        self.subs += other.subs
        return self

-    def clean(self, expressions):
-        if len(expressions) == 0:
-            return
-
-        # Remove lines matching any expression
-        for regexp in expressions:
-            self.subs = [sub for sub in self.subs if not sub.matches(regexp)]
-
    def shift(self, ms):
        for sub in self.subs:
            sub.shift(ms)
-        self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs))
+        self.subs = [sub for sub in self.subs if sub.time_start >= 0]

    def strip_html(self):
        p = re.compile('<.+?>')
@@ -260,26 +276,54 @@ class SubripFile:
            i += 1

    def process(self, args, config):
-        if args.clean:
-            self.clean(config.expressions)
+        html_regex = re.compile('<.+?>')
+        new_subs = []
+        for sub in self.subs:
+            if args.begin and not args.begin.include_after(sub):
+                new_subs.append(sub)
+                continue

-        if args.shift:
-            self.shift(args.shift)
+            if args.end and not args.end.include_before(sub):
+                new_subs.append(sub)
+                continue

-        if args.no_html:
-            self.strip_html()
+            if args.clean and len(config.expressions) > 0:
+                if any(sub.matches(regex) for regex in config.expressions):
+                    continue
+
+            if args.shift:
+                sub.shift(args.shift)
+                if sub.time_start < 0:
+                    continue
+
+            if args.no_html:
+                sub.replace(html_regex, '')
+
+            new_subs.append(sub)
+
+        self.subs = new_subs

-        self.renumber()
        self.write_file(args.replace)

    def write_file(self, in_place=False, stdout=False):
+        self.renumber()
+
        if stdout:
            print(self)
            return

-        file = self.file_name if in_place else 'out-' + self.file_name
+        try:
+            if in_place:
+                path = self.file_name
+                output = open(path, 'w', encoding='utf-8')
+            else:
+                path = Path(self.file_name)
+                path = path.with_name('out-' + path.name)
+                output = path.open(mode='w', encoding='utf-8')
+        except PermissionError:
+            panic('Can\'t access file {}: Permission denied'
+                  .format(path), 1)

-        output = open(file, 'w', encoding='utf-8')
        output.write(repr(self))

        if len(self.subs) > 0:
@@ -291,6 +335,12 @@ class SubripFile:
        os.remove(self.file_name)
        del self

+    def trunc_before(self, marker):
+        self.subs = [sub for sub in self.subs if marker.include_after(sub)]
+
+    def trunc_after(self, marker):
+        self.subs = [sub for sub in self.subs if marker.include_before(sub)]
+
    def __repr__(self):
        return '\n\n'.join(map(repr, self.subs))

@@ -302,29 +352,6 @@ def parse_args(args):
        add_help=True
    )

-    parser.add_argument(
-        '-c', '--clean',
-        help='remove subtitles matching regular expressions ' +
-             'listed in the config file (this is the default ' +
-             'behavior if no other flag is passed)',
-        action='store_true'
-    )
-
-    parser.add_argument(
-        '-s', '--shift',
-        help='shift all subtitles by MS milliseconds, which ' +
-             'may be positive or negative',
-        metavar='MS',
-        action='store',
-        type=int
-    )
-
-    parser.add_argument(
-        '-n', '--no-html',
-        help='strip HTML tags from subtitles content',
-        action='store_true'
-    )
-
    # Requires --clean
    parser.add_argument(
        '-f', '--config',
@@ -335,18 +362,45 @@ def parse_args(args):
        type=argparse.FileType('r')
    )

-    parser.add_argument(
+    processing = parser.add_argument_group(
+        'processing',
+        'Flags that specify an action to be taken. Many may ' +
+        'be specified.'
+    )
+
+    processing.add_argument(
+        '-c', '--clean',
+        help='remove subtitles matching regular expressions ' +
+             'listed in the config file (this is the default ' +
+             'behavior if no other flag is passed)',
+        action='store_true'
+    )
+
+    processing.add_argument(
+        '-s', '--shift',
+        help='shift all subtitles by MS milliseconds, which ' +
+             'may be positive or negative',
+        metavar='MS',
+        action='store',
+        type=int
+    )
+
+    processing.add_argument(
+        '-n', '--no-html',
+        help='strip HTML tags from subtitles content',
+        action='store_true'
+    )
+
+    processing.add_argument(
        '-j', '--join',
        help='join all files into the first, shifting their time accordingly',
        action='store_true'
    )

-    # Requires --begin
-    parser.add_argument(
+    # Requires --begin or --end, may have both
+    processing.add_argument(
        '-u', '--cut-out',
-        help='cut out the specified section from the file(s), creating ' +
-             'for every input file a new one prefixed with "cut-" ' +
-             '(--join will join both the input files and the cutouts)',
+        help='cut the specified section from the file(s) into new files',
        action='store_true'
    )

@@ -385,29 +439,39 @@ def parse_args(args):

    section.add_argument(
        '-b', '--begin',
-        help='specify section beginning (by default, beginning of file)',
+        help='specify section beginning (inclusive)',
        metavar='B',
        action='store'
    )

    section.add_argument(
        '-e', '--end',
-        help='specify section end (by default, end of file)',
+        help='specify section end (inclusive)',
        metavar='E',
        action='store'
    )

    args = parser.parse_args(args)

+    # Flags that require section
+    if args.cut_out:
+        if not args.begin and not args.end:
+            panic('You must specify a section to work with', 1)
+
    # Make sure --clean is the default
-    # TODO: account for new options
-    if not args.shift and not args.no_html:
+    if not any((args.shift, args.no_html, args.join, args.cut_out)):
        args.clean = True

    # Validate options
    if not args.clean and args.config:
        panic('-f requires -c', 1)

+    if args.begin:
+        args.begin = SectionMarker(args.begin)
+
+    if args.end:
+        args.end = SectionMarker(args.end)
+
    return args


@@ -419,6 +483,15 @@ def run(args):
    for file in args.files:
        parsed_files.append(SubripFile(file))

+    if args.cut_out:
+        if args.begin:
+            for file in parsed_files:
+                file.trunc_before(args.begin)
+
+        if args.end:
+            for file in parsed_files:
+                file.trunc_after(args.end)
+
    if args.join:
        first = parsed_files.pop(0)
        while True:
@@ -429,7 +502,6 @@ def run(args):
            except IndexError:
                break
        parsed_files.append(first)
-        first.renumber()

    for file in parsed_files:
        file.process(args, config)
--- a/tests/integration.py
+++ b/tests/integration.py
@@ -9,39 +9,39 @@ from pathlib import Path
 class TestFsub(unittest.TestCase):
    samples = Path('tests/samples')

-    def run_on(self, args, samples, ofiles, replace=False):
+    def run_on(self, args, samples, expect_out_files, replace=False):
        caller = inspect.stack()[1][3]
-        ifiles = []
+        cloned_samples = []

        samples = map(lambda s: str(self.samples / s) + '.srt', samples)
        i = 1
        for sample in samples:
-            ifile = str(i) + '.' + caller + '.srt'
-            shutil.copy(sample, ifile)
-            args.append(ifile)
-            ifiles.append(ifile)
+            cloned_sample = str(i) + '.' + caller + '.srt'
+            shutil.copy(sample, cloned_sample)
+            args.append(cloned_sample)
+            cloned_samples.append(cloned_sample)
            i += 1

        fsub.run(args)

-        limit = len(ofiles)
-        for i, ifile in enumerate(ifiles):
+        limit = len(expect_out_files)
+        for i, cloned_sample in enumerate(cloned_samples):
            if i < limit:
                if not replace:
-                    os.remove(ifile)
-                    ifile = 'out-' + ifile
-                out = open(ifile)
+                    os.remove(cloned_sample)
+                    cloned_sample = 'out-' + cloned_sample
+                out = open(cloned_sample)
                result = out.read()
                out.close()

-                ofile = str(self.samples / ofiles[i]) + '.srt'
-                cmp_file = open(ofile)
-                cmp = cmp_file.read()
-                cmp_file.close()
+                expect_out_file = str(self.samples/expect_out_files[i])+'.srt'
+                expect_out_file = open(expect_out_file)
+                expect_out = expect_out_file.read()
+                expect_out_file.close()

-                self.assertEqual(result, cmp)
+                self.assertEqual(result, expect_out)
            try:
-                os.remove(ifile)
+                os.remove(cloned_sample)
            except FileNotFoundError:
                pass

@@ -49,9 +49,17 @@ class TestFsub(unittest.TestCase):
        args = ['-f', str(self.samples / 'blacklist')]
        self.run_on(args, ['sample1'], ['sample1-cleaned'])

+    def test_cleaned_begin(self):
+        args = ['-f', str(self.samples / 'blacklist'), '-b', '3']
+        self.run_on(args, ['sample1'], ['sample1-cleaned-begin'])
+
    def test_stripped(self):
        self.run_on(['-n'], ['sample1'], ['sample1-stripped'])

+    def test_stripped_end(self):
+        args = ['-n', '-e', '00:00:55,500']
+        self.run_on(args, ['sample1'], ['sample1-stripped-end'])
+
    def test_cleaned_stripped(self):
        args = ['-c', '-f', str(self.samples / 'blacklist'), '-n']
        self.run_on(args, ['sample1'], ['sample1-cleaned-stripped'])
@@ -71,11 +79,32 @@ class TestFsub(unittest.TestCase):
        args = ['-s', '-52000']
        self.run_on(args, ['sample1'], ['sample1-shifted-minus-52s'])

+    def test_shifted_minus_1h_begin(self):
+        args = ['-s', '-3600000', '-b', '00:00:53,500']
+        self.run_on(args, ['sample1'], ['sample1-shifted-minus-1h-begin'])
+
    def test_joined(self):
        args = ['-j']
        self.run_on(args, ['sample1', 'sample2', 'sample3'],
                          ['sample1-sample2-sample3-joined'])

+    def test_cut_begin(self):
+        args = ['-b', '2', '-u']
+        self.run_on(args, ['sample1'], ['sample1-cut-out-begin'])
+
+    def test_cut_end(self):
+        args = ['-e', '1', '-u']
+        self.run_on(args, ['sample1'], ['sample1-cut-out-end'])
+
+    def test_cut_begin_end(self):
+        args = ['-b', '2', '-e', '4', '-u']
+        self.run_on(args, ['sample1'], ['sample1-cut-out-begin-end'])
+
+    def test_cut_end_joined(self):
+        args = ['-e', '1', '-u', '-j']
+        self.run_on(args, ['sample1', 'sample3'],
+                          ['sample1-sample3-cut-out-end-joined'])
+

 if __name__ == '__main__':
    unittest.main()
--- a/tests/samples/sample1-cleaned-begin.srt
+++ b/tests/samples/sample1-cleaned-begin.srt
@@ -0,0 +1,17 @@
+1
+00:00:48,900 --> 00:00:49,800
+<b>This one is full of HTML tags.</b>
+<i>Above, below, everywhere</i>
+
+2
+00:00:51,800 --> 00:00:52,700
+<a href='dummy'>Even <a>'s!</a>
+
+3
+00:00:56,000 --> 00:00:57,000
+<p>It should just strip all of
+them mercilessly</p>
+
+4
+00:00:58,100 --> 00:00:59,600
+<ul>Including this one!</ul>
--- a/tests/samples/sample1-cut-out-begin-end.srt
+++ b/tests/samples/sample1-cut-out-begin-end.srt
@@ -0,0 +1,14 @@
+1
+00:00:51,800 --> 00:00:52,700
+<a href='dummy'>Even <a>'s!</a>
+
+2
+00:00:53,500 --> 00:00:55,200
+<html>The script should not
+care whether the tag is
+valid or not</html>
+
+3
+00:00:56,000 --> 00:00:57,000
+<p>It should just strip all of
+them mercilessly</p>
--- a/tests/samples/sample1-cut-out-begin.srt
+++ b/tests/samples/sample1-cut-out-begin.srt
@@ -0,0 +1,18 @@
+1
+00:00:51,800 --> 00:00:52,700
+<a href='dummy'>Even <a>'s!</a>
+
+2
+00:00:53,500 --> 00:00:55,200
+<html>The script should not
+care whether the tag is
+valid or not</html>
+
+3
+00:00:56,000 --> 00:00:57,000
+<p>It should just strip all of
+them mercilessly</p>
+
+4
+00:00:58,100 --> 00:00:59,600
+<ul>Including this one!</ul>
--- a/tests/samples/sample1-cut-out-end.srt
+++ b/tests/samples/sample1-cut-out-end.srt
@@ -0,0 +1,4 @@
+1
+00:00:48,900 --> 00:00:49,800
+<b>This one is full of HTML tags.</b>
+<i>Above, below, everywhere</i>
--- a/tests/samples/sample1-sample3-cut-out-end-joined.srt
+++ b/tests/samples/sample1-sample3-cut-out-end-joined.srt
@@ -0,0 +1,8 @@
+1
+00:00:48,900 --> 00:00:49,800
+<b>This one is full of HTML tags.</b>
+<i>Above, below, everywhere</i>
+
+2
+10:03:49,800 --> 10:05:02,000
+This one has even more whitespace!
--- a/tests/samples/sample1-shifted-minus-1h-begin.srt
+++ b/tests/samples/sample1-shifted-minus-1h-begin.srt
@@ -0,0 +1,8 @@
+1
+00:00:48,900 --> 00:00:49,800
+<b>This one is full of HTML tags.</b>
+<i>Above, below, everywhere</i>
+
+2
+00:00:51,800 --> 00:00:52,700
+<a href='dummy'>Even <a>'s!</a>
--- a/tests/samples/sample1-stripped-end.srt
+++ b/tests/samples/sample1-stripped-end.srt
@@ -0,0 +1,23 @@
+1
+00:00:48,900 --> 00:00:49,800
+This one is full of HTML tags.
+Above, below, everywhere
+
+2
+00:00:51,800 --> 00:00:52,700
+Even 's!
+
+3
+00:00:53,500 --> 00:00:55,200
+The script should not
+care whether the tag is
+valid or not
+
+4
+00:00:56,000 --> 00:00:57,000
+<p>It should just strip all of
+them mercilessly</p>
+
+5
+00:00:58,100 --> 00:00:59,600
+<ul>Including this one!</ul>