From d9c25e1b27a3382dca70def99beff788f3773b2b Mon Sep 17 00:00:00 2001
From: Augusto Gunsch <augustogunsch@tutanota.com>
Date: Wed, 17 Nov 2021 20:45:38 -0300
Subject: [PATCH] Implement new flags

---
 README.md                                     |  24 ++-
 setup.cfg                                     |   2 +-
 src/fsub/fsub.py                              | 176 ++++++++++++------
 tests/integration.py                          |  63 +++++--
 tests/samples/sample1-cleaned-begin.srt       |  17 ++
 tests/samples/sample1-cut-out-begin-end.srt   |  14 ++
 tests/samples/sample1-cut-out-begin.srt       |  18 ++
 tests/samples/sample1-cut-out-end.srt         |   4 +
 .../sample1-sample3-cut-out-end-joined.srt    |   8 +
 .../sample1-shifted-minus-1h-begin.srt        |   8 +
 tests/samples/sample1-stripped-end.srt        |  23 +++
 11 files changed, 282 insertions(+), 75 deletions(-)
 create mode 100644 tests/samples/sample1-cleaned-begin.srt
 create mode 100644 tests/samples/sample1-cut-out-begin-end.srt
 create mode 100644 tests/samples/sample1-cut-out-begin.srt
 create mode 100644 tests/samples/sample1-cut-out-end.srt
 create mode 100644 tests/samples/sample1-sample3-cut-out-end-joined.srt
 create mode 100644 tests/samples/sample1-shifted-minus-1h-begin.srt
 create mode 100644 tests/samples/sample1-stripped-end.srt
diff --git a/README.md b/README.md
index 52f7bdb..af4c426 100644
--- a/README.md
+++ b/README.md
@@ -9,8 +9,6 @@ pip install fsub
 
 # Usage
 ```
-usage: fsub [-h] [-c] [-s MS] [-n] [-f F] [-j] [-r] file [file ...]
-
 Fix, edit and clean SubRip (.srt) files.
 
 positional arguments:
@@ -18,14 +16,30 @@ positional arguments:
 
 optional arguments:
   -h, --help         show this help message and exit
+  -f F, --config F   use F as the config file (by default, on Unix it is:
+                     "$HOME/.config/fsubrc"; on Windows it is: "%APPDATA%\fsubrc")
+  -r, --replace      edit files in-place (--join will delete joined files too), instead of the
+                     default behavior of outputing results into files prefixed with "out-"
+  -p, --stdout       dump results to stdout, and do not edit nor write any file
+
+processing:
+  Flags that specify an action to be taken. Many may be specified.
+
   -c, --clean        remove subtitles matching regular expressions listed in the config file
                      (this is the default behavior if no other flag is passed)
   -s MS, --shift MS  shift all subtitles by MS milliseconds, which may be positive or negative
   -n, --no-html      strip HTML tags from subtitles content
-  -f F, --config F   use F as the config file (by default, F is: on Unix:
-                     $HOME/.config/fsubrc; on Windows: %APPDATA%\fsubrc)
   -j, --join         join all files into the first, shifting their time accordingly
-  -r, --replace      edit files in-place (-j will delete joined files too)
+  -u, --cut-out      cut the specified section from the file(s) into new files
+
+sectioning:
+  Flags that specify a section to work in. They accept either a subtitle number or a time
+  stamp in the SubRip format ("<hours>:<minutes>:<seconds>,<milliseconds>", where hours,
+  minutes, seconds are 2-zero padded while milliseconds is 3-zero padded). fsub will not
+  modify subtitles outside this range, except while joining the files.
+
+  -b B, --begin B    specify section beginning (inclusive)
+  -e E, --end E      specify section end (inclusive)
 ```
 
 # Testing
diff --git a/setup.cfg b/setup.cfg
index 26f2a07..c0233f4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = fsub
-version = 0.1.3
+version = 1.0.0
 author = Augusto Lenz Gunsch
 author_email = augustogunsch@tutanota.com
 description = CLI SubRip editor
diff --git a/src/fsub/fsub.py b/src/fsub/fsub.py
index f61910b..3439021 100755
--- a/src/fsub/fsub.py
+++ b/src/fsub/fsub.py
@@ -96,6 +96,30 @@ class TimeStamp:
          (self.hours, self.minutes, self.seconds, self.millisecods)
 
 
+class SectionMarker:
+    def __init__(self, arg):
+        try:
+            self.marker = TimeStamp(arg)
+        except Exception:
+            try:
+                self.marker = int(arg)
+            except Exception:
+                panic('Invalid section marker argument', 1)
+
+    def include_after(self, other):
+        if type(self.marker) is TimeStamp:
+            return other.time_start >= self.marker
+        return other.number >= self.marker
+
+    def include_before(self, other):
+        if type(self.marker) is TimeStamp:
+            return other.time_end <= self.marker
+        return other.number <= self.marker
+
+    def __le__(self, other):
+        return int(self) <= int(other)
+
+
 class Subtitle:
     # Parse a single subtitle
     def __init__(self, lines, file_name, line_number):
@@ -235,18 +259,10 @@ class SubripFile:
         self.subs += other.subs
         return self
 
-    def clean(self, expressions):
-        if len(expressions) == 0:
-            return
-
-        # Remove lines matching any expression
-        for regexp in expressions:
-            self.subs = [sub for sub in self.subs if not sub.matches(regexp)]
-
     def shift(self, ms):
         for sub in self.subs:
             sub.shift(ms)
-        self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs))
+        self.subs = [sub for sub in self.subs if sub.time_start >= 0]
 
     def strip_html(self):
         p = re.compile('<.+?>')
@@ -260,26 +276,54 @@ class SubripFile:
             i += 1
 
     def process(self, args, config):
-        if args.clean:
-            self.clean(config.expressions)
+        html_regex = re.compile('<.+?>')
+        new_subs = []
+        for sub in self.subs:
+            if args.begin and not args.begin.include_after(sub):
+                new_subs.append(sub)
+                continue
 
-        if args.shift:
-            self.shift(args.shift)
+            if args.end and not args.end.include_before(sub):
+                new_subs.append(sub)
+                continue
 
-        if args.no_html:
-            self.strip_html()
+            if args.clean and len(config.expressions) > 0:
+                if any(sub.matches(regex) for regex in config.expressions):
+                    continue
+
+            if args.shift:
+                sub.shift(args.shift)
+                if sub.time_start < 0:
+                    continue
+
+            if args.no_html:
+                sub.replace(html_regex, '')
+
+            new_subs.append(sub)
+
+        self.subs = new_subs
 
-        self.renumber()
         self.write_file(args.replace)
 
     def write_file(self, in_place=False, stdout=False):
+        self.renumber()
+
         if stdout:
             print(self)
             return
 
-        file = self.file_name if in_place else 'out-' + self.file_name
+        try:
+            if in_place:
+                path = self.file_name
+                output = open(path, 'w', encoding='utf-8')
+            else:
+                path = Path(self.file_name)
+                path = path.with_name('out-' + path.name)
+                output = path.open(mode='w', encoding='utf-8')
+        except PermissionError:
+            panic('Can\'t access file {}: Permission denied'
+                  .format(path), 1)
 
-        output = open(file, 'w', encoding='utf-8')
         output.write(repr(self))
 
         if len(self.subs) > 0:
@@ -291,6 +335,12 @@ class SubripFile:
         os.remove(self.file_name)
         del self
 
+    def trunc_before(self, marker):
+        self.subs = [sub for sub in self.subs if marker.include_after(sub)]
+
+    def trunc_after(self, marker):
+        self.subs = [sub for sub in self.subs if marker.include_before(sub)]
+
     def __repr__(self):
         return '\n\n'.join(map(repr, self.subs))
 
@@ -302,29 +352,6 @@ def parse_args(args):
         add_help=True
     )
 
-    parser.add_argument(
-        '-c', '--clean',
-        help='remove subtitles matching regular expressions ' +
-             'listed in the config file (this is the default ' +
-             'behavior if no other flag is passed)',
-        action='store_true'
-    )
-
-    parser.add_argument(
-        '-s', '--shift',
-        help='shift all subtitles by MS milliseconds, which ' +
-             'may be positive or negative',
-        metavar='MS',
-        action='store',
-        type=int
-    )
-
-    parser.add_argument(
-        '-n', '--no-html',
-        help='strip HTML tags from subtitles content',
-        action='store_true'
-    )
-
     # Requires --clean
     parser.add_argument(
         '-f', '--config',
@@ -335,18 +362,45 @@ def parse_args(args):
         type=argparse.FileType('r')
     )
 
-    parser.add_argument(
+    processing = parser.add_argument_group(
+        'processing',
+        'Flags that specify an action to be taken. Many may ' +
+        'be specified.'
+    )
+
+    processing.add_argument(
+        '-c', '--clean',
+        help='remove subtitles matching regular expressions ' +
+             'listed in the config file (this is the default ' +
+             'behavior if no other flag is passed)',
+        action='store_true'
+    )
+
+    processing.add_argument(
+        '-s', '--shift',
+        help='shift all subtitles by MS milliseconds, which ' +
+             'may be positive or negative',
+        metavar='MS',
+        action='store',
+        type=int
+    )
+
+    processing.add_argument(
+        '-n', '--no-html',
+        help='strip HTML tags from subtitles content',
+        action='store_true'
+    )
+
+    processing.add_argument(
         '-j', '--join',
         help='join all files into the first, shifting their time accordingly',
         action='store_true'
     )
 
-    # Requires --begin
-    parser.add_argument(
+    # Requires --begin or --end, may have both
+    processing.add_argument(
         '-u', '--cut-out',
-        help='cut out the specified section from the file(s), creating ' +
-             'for every input file a new one prefixed with "cut-" ' +
-             '(--join will join both the input files and the cutouts)',
+        help='cut the specified section from the file(s) into new files',
         action='store_true'
     )
 
@@ -385,29 +439,39 @@ def parse_args(args):
 
     section.add_argument(
         '-b', '--begin',
-        help='specify section beginning (by default, beginning of file)',
+        help='specify section beginning (inclusive)',
         metavar='B',
         action='store'
     )
 
     section.add_argument(
         '-e', '--end',
-        help='specify section end (by default, end of file)',
+        help='specify section end (inclusive)',
         metavar='E',
         action='store'
     )
 
     args = parser.parse_args(args)
 
+    # Flags that require section
+    if args.cut_out:
+        if not args.begin and not args.end:
+            panic('You must specify a section to work with', 1)
+
     # Make sure --clean is the default
-    # TODO: account for new options
-    if not args.shift and not args.no_html:
+    if not any((args.shift, args.no_html, args.join, args.cut_out)):
         args.clean = True
 
     # Validate options
     if not args.clean and args.config:
         panic('-f requires -c', 1)
 
+    if args.begin:
+        args.begin = SectionMarker(args.begin)
+
+    if args.end:
+        args.end = SectionMarker(args.end)
+
     return args
 
 
@@ -419,6 +483,15 @@ def run(args):
     for file in args.files:
         parsed_files.append(SubripFile(file))
 
+    if args.cut_out:
+        if args.begin:
+            for file in parsed_files:
+                file.trunc_before(args.begin)
+
+        if args.end:
+            for file in parsed_files:
+                file.trunc_after(args.end)
+
     if args.join:
         first = parsed_files.pop(0)
         while True:
@@ -429,7 +502,6 @@ def run(args):
             except IndexError:
                 break
         parsed_files.append(first)
-        first.renumber()
 
     for file in parsed_files:
         file.process(args, config)
diff --git a/tests/integration.py b/tests/integration.py
index b27d4d0..48af708 100644
--- a/tests/integration.py
+++ b/tests/integration.py
@@ -9,39 +9,39 @@ from pathlib import Path
 class TestFsub(unittest.TestCase):
     samples = Path('tests/samples')
 
-    def run_on(self, args, samples, ofiles, replace=False):
+    def run_on(self, args, samples, expect_out_files, replace=False):
         caller = inspect.stack()[1][3]
-        ifiles = []
+        cloned_samples = []
 
         samples = map(lambda s: str(self.samples / s) + '.srt', samples)
         i = 1
         for sample in samples:
-            ifile = str(i) + '.' + caller + '.srt'
-            shutil.copy(sample, ifile)
-            args.append(ifile)
-            ifiles.append(ifile)
+            cloned_sample = str(i) + '.' + caller + '.srt'
+            shutil.copy(sample, cloned_sample)
+            args.append(cloned_sample)
+            cloned_samples.append(cloned_sample)
             i += 1
 
         fsub.run(args)
 
-        limit = len(ofiles)
-        for i, ifile in enumerate(ifiles):
+        limit = len(expect_out_files)
+        for i, cloned_sample in enumerate(cloned_samples):
             if i < limit:
                 if not replace:
-                    os.remove(ifile)
-                    ifile = 'out-' + ifile
-                out = open(ifile)
+                    os.remove(cloned_sample)
+                    cloned_sample = 'out-' + cloned_sample
+                out = open(cloned_sample)
                 result = out.read()
                 out.close()
 
-                ofile = str(self.samples / ofiles[i]) + '.srt'
-                cmp_file = open(ofile)
-                cmp = cmp_file.read()
-                cmp_file.close()
+                expect_out_file = str(self.samples/expect_out_files[i])+'.srt'
+                expect_out_file = open(expect_out_file)
+                expect_out = expect_out_file.read()
+                expect_out_file.close()
 
-                self.assertEqual(result, cmp)
+                self.assertEqual(result, expect_out)
             try:
-                os.remove(ifile)
+                os.remove(cloned_sample)
             except FileNotFoundError:
                 pass
 
@@ -49,9 +49,17 @@ class TestFsub(unittest.TestCase):
         args = ['-f', str(self.samples / 'blacklist')]
         self.run_on(args, ['sample1'], ['sample1-cleaned'])
 
+    def test_cleaned_begin(self):
+        args = ['-f', str(self.samples / 'blacklist'), '-b', '3']
+        self.run_on(args, ['sample1'], ['sample1-cleaned-begin'])
+
     def test_stripped(self):
         self.run_on(['-n'], ['sample1'], ['sample1-stripped'])
 
+    def test_stripped_end(self):
+        args = ['-n', '-e', '00:00:55,500']
+        self.run_on(args, ['sample1'], ['sample1-stripped-end'])
+
     def test_cleaned_stripped(self):
         args = ['-c', '-f', str(self.samples / 'blacklist'), '-n']
         self.run_on(args, ['sample1'], ['sample1-cleaned-stripped'])
@@ -71,11 +79,32 @@ class TestFsub(unittest.TestCase):
         args = ['-s', '-52000']
         self.run_on(args, ['sample1'], ['sample1-shifted-minus-52s'])
 
+    def test_shifted_minus_1h_begin(self):
+        args = ['-s', '-3600000', '-b', '00:00:53,500']
+        self.run_on(args, ['sample1'], ['sample1-shifted-minus-1h-begin'])
+
     def test_joined(self):
         args = ['-j']
         self.run_on(args, ['sample1', 'sample2', 'sample3'],
                           ['sample1-sample2-sample3-joined'])
 
+    def test_cut_begin(self):
+        args = ['-b', '2', '-u']
+        self.run_on(args, ['sample1'], ['sample1-cut-out-begin'])
+
+    def test_cut_end(self):
+        args = ['-e', '1', '-u']
+        self.run_on(args, ['sample1'], ['sample1-cut-out-end'])
+
+    def test_cut_begin_end(self):
+        args = ['-b', '2', '-e', '4', '-u']
+        self.run_on(args, ['sample1'], ['sample1-cut-out-begin-end'])
+
+    def test_cut_end_joined(self):
+        args = ['-e', '1', '-u', '-j']
+        self.run_on(args, ['sample1', 'sample3'],
+                          ['sample1-sample3-cut-out-end-joined'])
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/samples/sample1-cleaned-begin.srt b/tests/samples/sample1-cleaned-begin.srt
new file mode 100644
index 0000000..ed91c54
--- /dev/null
+++ b/tests/samples/sample1-cleaned-begin.srt
@@ -0,0 +1,17 @@
+1
+00:00:48,900 --> 00:00:49,800
+<b>This one is full of HTML tags.</b>
+<i>Above, below, everywhere</i>
+
+2
+00:00:51,800 --> 00:00:52,700
+<a href='dummy'>Even <a>'s!</a>
+
+3
+00:00:56,000 --> 00:00:57,000
+<p>It should just strip all of
+them mercilessly</p>
+
+4
+00:00:58,100 --> 00:00:59,600
+<ul>Including this one!</ul>
diff --git a/tests/samples/sample1-cut-out-begin-end.srt b/tests/samples/sample1-cut-out-begin-end.srt
new file mode 100644
index 0000000..67c3d79
--- /dev/null
+++ b/tests/samples/sample1-cut-out-begin-end.srt
@@ -0,0 +1,14 @@
+1
+00:00:51,800 --> 00:00:52,700
+<a href='dummy'>Even <a>'s!</a>
+
+2
+00:00:53,500 --> 00:00:55,200
+<html>The script should not
+care whether the tag is
+valid or not</html>
+
+3
+00:00:56,000 --> 00:00:57,000
+<p>It should just strip all of
+them mercilessly</p>
diff --git a/tests/samples/sample1-cut-out-begin.srt b/tests/samples/sample1-cut-out-begin.srt
new file mode 100644
index 0000000..3149f04
--- /dev/null
+++ b/tests/samples/sample1-cut-out-begin.srt
@@ -0,0 +1,18 @@
+1
+00:00:51,800 --> 00:00:52,700
+<a href='dummy'>Even <a>'s!</a>
+
+2
+00:00:53,500 --> 00:00:55,200
+<html>The script should not
+care whether the tag is
+valid or not</html>
+
+3
+00:00:56,000 --> 00:00:57,000
+<p>It should just strip all of
+them mercilessly</p>
+
+4
+00:00:58,100 --> 00:00:59,600
+<ul>Including this one!</ul>
diff --git a/tests/samples/sample1-cut-out-end.srt b/tests/samples/sample1-cut-out-end.srt
new file mode 100644
index 0000000..d60785a
--- /dev/null
+++ b/tests/samples/sample1-cut-out-end.srt
@@ -0,0 +1,4 @@
+1
+00:00:48,900 --> 00:00:49,800
+<b>This one is full of HTML tags.</b>
+<i>Above, below, everywhere</i>
diff --git a/tests/samples/sample1-sample3-cut-out-end-joined.srt b/tests/samples/sample1-sample3-cut-out-end-joined.srt
new file mode 100644
index 0000000..1f028cb
--- /dev/null
+++ b/tests/samples/sample1-sample3-cut-out-end-joined.srt
@@ -0,0 +1,8 @@
+1
+00:00:48,900 --> 00:00:49,800
+<b>This one is full of HTML tags.</b>
+<i>Above, below, everywhere</i>
+
+2
+10:03:49,800 --> 10:05:02,000
+This one has even more whitespace!
diff --git a/tests/samples/sample1-shifted-minus-1h-begin.srt b/tests/samples/sample1-shifted-minus-1h-begin.srt
new file mode 100644
index 0000000..2b09ba4
--- /dev/null
+++ b/tests/samples/sample1-shifted-minus-1h-begin.srt
@@ -0,0 +1,8 @@
+1
+00:00:48,900 --> 00:00:49,800
+<b>This one is full of HTML tags.</b>
+<i>Above, below, everywhere</i>
+
+2
+00:00:51,800 --> 00:00:52,700
+<a href='dummy'>Even <a>'s!</a>
diff --git a/tests/samples/sample1-stripped-end.srt b/tests/samples/sample1-stripped-end.srt
new file mode 100644
index 0000000..b68798d
--- /dev/null
+++ b/tests/samples/sample1-stripped-end.srt
@@ -0,0 +1,23 @@
+1
+00:00:48,900 --> 00:00:49,800
+This one is full of HTML tags.
+Above, below, everywhere
+
+2
+00:00:51,800 --> 00:00:52,700
+Even 's!
+
+3
+00:00:53,500 --> 00:00:55,200
+The script should not
+care whether the tag is
+valid or not
+
+4
+00:00:56,000 --> 00:00:57,000
+<p>It should just strip all of
+them mercilessly</p>
+
+5
+00:00:58,100 --> 00:00:59,600
+<ul>Including this one!</ul>