From d9c25e1b27a3382dca70def99beff788f3773b2b Mon Sep 17 00:00:00 2001 From: Augusto Gunsch Date: Wed, 17 Nov 2021 20:45:38 -0300 Subject: [PATCH] Implement new flags --- README.md | 24 ++- setup.cfg | 2 +- src/fsub/fsub.py | 176 ++++++++++++------ tests/integration.py | 63 +++++-- tests/samples/sample1-cleaned-begin.srt | 17 ++ tests/samples/sample1-cut-out-begin-end.srt | 14 ++ tests/samples/sample1-cut-out-begin.srt | 18 ++ tests/samples/sample1-cut-out-end.srt | 4 + .../sample1-sample3-cut-out-end-joined.srt | 8 + .../sample1-shifted-minus-1h-begin.srt | 8 + tests/samples/sample1-stripped-end.srt | 23 +++ 11 files changed, 282 insertions(+), 75 deletions(-) create mode 100644 tests/samples/sample1-cleaned-begin.srt create mode 100644 tests/samples/sample1-cut-out-begin-end.srt create mode 100644 tests/samples/sample1-cut-out-begin.srt create mode 100644 tests/samples/sample1-cut-out-end.srt create mode 100644 tests/samples/sample1-sample3-cut-out-end-joined.srt create mode 100644 tests/samples/sample1-shifted-minus-1h-begin.srt create mode 100644 tests/samples/sample1-stripped-end.srt diff --git a/README.md b/README.md index 52f7bdb..af4c426 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,6 @@ pip install fsub # Usage ``` -usage: fsub [-h] [-c] [-s MS] [-n] [-f F] [-j] [-r] file [file ...] - Fix, edit and clean SubRip (.srt) files. positional arguments: @@ -18,14 +16,30 @@ positional arguments: optional arguments: -h, --help show this help message and exit + -f F, --config F use F as the config file (by default, on Unix it is: + "$HOME/.config/fsubrc"; on Windows it is: "%APPDATA%\fsubrc") + -r, --replace edit files in-place (--join will delete joined files too), instead of the + default behavior of outputing results into files prefixed with "out-" + -p, --stdout dump results to stdout, and do not edit nor write any file + +processing: + Flags that specify an action to be taken. Many may be specified. + -c, --clean remove subtitles matching regular expressions listed in the config file (this is the default behavior if no other flag is passed) -s MS, --shift MS shift all subtitles by MS milliseconds, which may be positive or negative -n, --no-html strip HTML tags from subtitles content - -f F, --config F use F as the config file (by default, F is: on Unix: - $HOME/.config/fsubrc; on Windows: %APPDATA%\fsubrc) -j, --join join all files into the first, shifting their time accordingly - -r, --replace edit files in-place (-j will delete joined files too) + -u, --cut-out cut the specified section from the file(s) into new files + +sectioning: + Flags that specify a section to work in. They accept either a subtitle number or a time + stamp in the SubRip format ("::,", where hours, + minutes, seconds are 2-zero padded while milliseconds is 3-zero padded). fsub will not + modify subtitles outside this range, except while joining the files. + + -b B, --begin B specify section beginning (inclusive) + -e E, --end E specify section end (inclusive) ``` # Testing diff --git a/setup.cfg b/setup.cfg index 26f2a07..c0233f4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = fsub -version = 0.1.3 +version = 1.0.0 author = Augusto Lenz Gunsch author_email = augustogunsch@tutanota.com description = CLI SubRip editor diff --git a/src/fsub/fsub.py b/src/fsub/fsub.py index f61910b..3439021 100755 --- a/src/fsub/fsub.py +++ b/src/fsub/fsub.py @@ -96,6 +96,30 @@ class TimeStamp: (self.hours, self.minutes, self.seconds, self.millisecods) +class SectionMarker: + def __init__(self, arg): + try: + self.marker = TimeStamp(arg) + except Exception: + try: + self.marker = int(arg) + except Exception: + panic('Invalid section marker argument', 1) + + def include_after(self, other): + if type(self.marker) is TimeStamp: + return other.time_start >= self.marker + return other.number >= self.marker + + def include_before(self, other): + if type(self.marker) is TimeStamp: + return other.time_end <= self.marker + return other.number <= self.marker + + def __le__(self, other): + return int(self) <= int(other) + + class Subtitle: # Parse a single subtitle def __init__(self, lines, file_name, line_number): @@ -235,18 +259,10 @@ class SubripFile: self.subs += other.subs return self - def clean(self, expressions): - if len(expressions) == 0: - return - - # Remove lines matching any expression - for regexp in expressions: - self.subs = [sub for sub in self.subs if not sub.matches(regexp)] - def shift(self, ms): for sub in self.subs: sub.shift(ms) - self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs)) + self.subs = [sub for sub in self.subs if sub.time_start >= 0] def strip_html(self): p = re.compile('<.+?>') @@ -260,26 +276,54 @@ class SubripFile: i += 1 def process(self, args, config): - if args.clean: - self.clean(config.expressions) + html_regex = re.compile('<.+?>') + new_subs = [] + for sub in self.subs: + if args.begin and not args.begin.include_after(sub): + new_subs.append(sub) + continue - if args.shift: - self.shift(args.shift) + if args.end and not args.end.include_before(sub): + new_subs.append(sub) + continue - if args.no_html: - self.strip_html() + if args.clean and len(config.expressions) > 0: + if any(sub.matches(regex) for regex in config.expressions): + continue + + if args.shift: + sub.shift(args.shift) + if sub.time_start < 0: + continue + + if args.no_html: + sub.replace(html_regex, '') + + new_subs.append(sub) + + self.subs = new_subs - self.renumber() self.write_file(args.replace) def write_file(self, in_place=False, stdout=False): + self.renumber() + if stdout: print(self) return - file = self.file_name if in_place else 'out-' + self.file_name + try: + if in_place: + path = self.file_name + output = open(path, 'w', encoding='utf-8') + else: + path = Path(self.file_name) + path = path.with_name('out-' + path.name) + output = path.open(mode='w', encoding='utf-8') + except PermissionError: + panic('Can\'t access file {}: Permission denied' + .format(path), 1) - output = open(file, 'w', encoding='utf-8') output.write(repr(self)) if len(self.subs) > 0: @@ -291,6 +335,12 @@ class SubripFile: os.remove(self.file_name) del self + def trunc_before(self, marker): + self.subs = [sub for sub in self.subs if marker.include_after(sub)] + + def trunc_after(self, marker): + self.subs = [sub for sub in self.subs if marker.include_before(sub)] + def __repr__(self): return '\n\n'.join(map(repr, self.subs)) @@ -302,29 +352,6 @@ def parse_args(args): add_help=True ) - parser.add_argument( - '-c', '--clean', - help='remove subtitles matching regular expressions ' + - 'listed in the config file (this is the default ' + - 'behavior if no other flag is passed)', - action='store_true' - ) - - parser.add_argument( - '-s', '--shift', - help='shift all subtitles by MS milliseconds, which ' + - 'may be positive or negative', - metavar='MS', - action='store', - type=int - ) - - parser.add_argument( - '-n', '--no-html', - help='strip HTML tags from subtitles content', - action='store_true' - ) - # Requires --clean parser.add_argument( '-f', '--config', @@ -335,18 +362,45 @@ def parse_args(args): type=argparse.FileType('r') ) - parser.add_argument( + processing = parser.add_argument_group( + 'processing', + 'Flags that specify an action to be taken. Many may ' + + 'be specified.' + ) + + processing.add_argument( + '-c', '--clean', + help='remove subtitles matching regular expressions ' + + 'listed in the config file (this is the default ' + + 'behavior if no other flag is passed)', + action='store_true' + ) + + processing.add_argument( + '-s', '--shift', + help='shift all subtitles by MS milliseconds, which ' + + 'may be positive or negative', + metavar='MS', + action='store', + type=int + ) + + processing.add_argument( + '-n', '--no-html', + help='strip HTML tags from subtitles content', + action='store_true' + ) + + processing.add_argument( '-j', '--join', help='join all files into the first, shifting their time accordingly', action='store_true' ) - # Requires --begin - parser.add_argument( + # Requires --begin or --end, may have both + processing.add_argument( '-u', '--cut-out', - help='cut out the specified section from the file(s), creating ' + - 'for every input file a new one prefixed with "cut-" ' + - '(--join will join both the input files and the cutouts)', + help='cut the specified section from the file(s) into new files', action='store_true' ) @@ -385,29 +439,39 @@ def parse_args(args): section.add_argument( '-b', '--begin', - help='specify section beginning (by default, beginning of file)', + help='specify section beginning (inclusive)', metavar='B', action='store' ) section.add_argument( '-e', '--end', - help='specify section end (by default, end of file)', + help='specify section end (inclusive)', metavar='E', action='store' ) args = parser.parse_args(args) + # Flags that require section + if args.cut_out: + if not args.begin and not args.end: + panic('You must specify a section to work with', 1) + # Make sure --clean is the default - # TODO: account for new options - if not args.shift and not args.no_html: + if not any((args.shift, args.no_html, args.join, args.cut_out)): args.clean = True # Validate options if not args.clean and args.config: panic('-f requires -c', 1) + if args.begin: + args.begin = SectionMarker(args.begin) + + if args.end: + args.end = SectionMarker(args.end) + return args @@ -419,6 +483,15 @@ def run(args): for file in args.files: parsed_files.append(SubripFile(file)) + if args.cut_out: + if args.begin: + for file in parsed_files: + file.trunc_before(args.begin) + + if args.end: + for file in parsed_files: + file.trunc_after(args.end) + if args.join: first = parsed_files.pop(0) while True: @@ -429,7 +502,6 @@ def run(args): except IndexError: break parsed_files.append(first) - first.renumber() for file in parsed_files: file.process(args, config) diff --git a/tests/integration.py b/tests/integration.py index b27d4d0..48af708 100644 --- a/tests/integration.py +++ b/tests/integration.py @@ -9,39 +9,39 @@ from pathlib import Path class TestFsub(unittest.TestCase): samples = Path('tests/samples') - def run_on(self, args, samples, ofiles, replace=False): + def run_on(self, args, samples, expect_out_files, replace=False): caller = inspect.stack()[1][3] - ifiles = [] + cloned_samples = [] samples = map(lambda s: str(self.samples / s) + '.srt', samples) i = 1 for sample in samples: - ifile = str(i) + '.' + caller + '.srt' - shutil.copy(sample, ifile) - args.append(ifile) - ifiles.append(ifile) + cloned_sample = str(i) + '.' + caller + '.srt' + shutil.copy(sample, cloned_sample) + args.append(cloned_sample) + cloned_samples.append(cloned_sample) i += 1 fsub.run(args) - limit = len(ofiles) - for i, ifile in enumerate(ifiles): + limit = len(expect_out_files) + for i, cloned_sample in enumerate(cloned_samples): if i < limit: if not replace: - os.remove(ifile) - ifile = 'out-' + ifile - out = open(ifile) + os.remove(cloned_sample) + cloned_sample = 'out-' + cloned_sample + out = open(cloned_sample) result = out.read() out.close() - ofile = str(self.samples / ofiles[i]) + '.srt' - cmp_file = open(ofile) - cmp = cmp_file.read() - cmp_file.close() + expect_out_file = str(self.samples/expect_out_files[i])+'.srt' + expect_out_file = open(expect_out_file) + expect_out = expect_out_file.read() + expect_out_file.close() - self.assertEqual(result, cmp) + self.assertEqual(result, expect_out) try: - os.remove(ifile) + os.remove(cloned_sample) except FileNotFoundError: pass @@ -49,9 +49,17 @@ class TestFsub(unittest.TestCase): args = ['-f', str(self.samples / 'blacklist')] self.run_on(args, ['sample1'], ['sample1-cleaned']) + def test_cleaned_begin(self): + args = ['-f', str(self.samples / 'blacklist'), '-b', '3'] + self.run_on(args, ['sample1'], ['sample1-cleaned-begin']) + def test_stripped(self): self.run_on(['-n'], ['sample1'], ['sample1-stripped']) + def test_stripped_end(self): + args = ['-n', '-e', '00:00:55,500'] + self.run_on(args, ['sample1'], ['sample1-stripped-end']) + def test_cleaned_stripped(self): args = ['-c', '-f', str(self.samples / 'blacklist'), '-n'] self.run_on(args, ['sample1'], ['sample1-cleaned-stripped']) @@ -71,11 +79,32 @@ class TestFsub(unittest.TestCase): args = ['-s', '-52000'] self.run_on(args, ['sample1'], ['sample1-shifted-minus-52s']) + def test_shifted_minus_1h_begin(self): + args = ['-s', '-3600000', '-b', '00:00:53,500'] + self.run_on(args, ['sample1'], ['sample1-shifted-minus-1h-begin']) + def test_joined(self): args = ['-j'] self.run_on(args, ['sample1', 'sample2', 'sample3'], ['sample1-sample2-sample3-joined']) + def test_cut_begin(self): + args = ['-b', '2', '-u'] + self.run_on(args, ['sample1'], ['sample1-cut-out-begin']) + + def test_cut_end(self): + args = ['-e', '1', '-u'] + self.run_on(args, ['sample1'], ['sample1-cut-out-end']) + + def test_cut_begin_end(self): + args = ['-b', '2', '-e', '4', '-u'] + self.run_on(args, ['sample1'], ['sample1-cut-out-begin-end']) + + def test_cut_end_joined(self): + args = ['-e', '1', '-u', '-j'] + self.run_on(args, ['sample1', 'sample3'], + ['sample1-sample3-cut-out-end-joined']) + if __name__ == '__main__': unittest.main() diff --git a/tests/samples/sample1-cleaned-begin.srt b/tests/samples/sample1-cleaned-begin.srt new file mode 100644 index 0000000..ed91c54 --- /dev/null +++ b/tests/samples/sample1-cleaned-begin.srt @@ -0,0 +1,17 @@ +1 +00:00:48,900 --> 00:00:49,800 +This one is full of HTML tags. +Above, below, everywhere + +2 +00:00:51,800 --> 00:00:52,700 +Even 's! + +3 +00:00:56,000 --> 00:00:57,000 +

It should just strip all of +them mercilessly

+ +4 +00:00:58,100 --> 00:00:59,600 +
    Including this one!
diff --git a/tests/samples/sample1-cut-out-begin-end.srt b/tests/samples/sample1-cut-out-begin-end.srt new file mode 100644 index 0000000..67c3d79 --- /dev/null +++ b/tests/samples/sample1-cut-out-begin-end.srt @@ -0,0 +1,14 @@ +1 +00:00:51,800 --> 00:00:52,700 +Even 's! + +2 +00:00:53,500 --> 00:00:55,200 +The script should not +care whether the tag is +valid or not + +3 +00:00:56,000 --> 00:00:57,000 +

It should just strip all of +them mercilessly

diff --git a/tests/samples/sample1-cut-out-begin.srt b/tests/samples/sample1-cut-out-begin.srt new file mode 100644 index 0000000..3149f04 --- /dev/null +++ b/tests/samples/sample1-cut-out-begin.srt @@ -0,0 +1,18 @@ +1 +00:00:51,800 --> 00:00:52,700 +Even 's! + +2 +00:00:53,500 --> 00:00:55,200 +The script should not +care whether the tag is +valid or not + +3 +00:00:56,000 --> 00:00:57,000 +

It should just strip all of +them mercilessly

+ +4 +00:00:58,100 --> 00:00:59,600 +
    Including this one!
diff --git a/tests/samples/sample1-cut-out-end.srt b/tests/samples/sample1-cut-out-end.srt new file mode 100644 index 0000000..d60785a --- /dev/null +++ b/tests/samples/sample1-cut-out-end.srt @@ -0,0 +1,4 @@ +1 +00:00:48,900 --> 00:00:49,800 +This one is full of HTML tags. +Above, below, everywhere diff --git a/tests/samples/sample1-sample3-cut-out-end-joined.srt b/tests/samples/sample1-sample3-cut-out-end-joined.srt new file mode 100644 index 0000000..1f028cb --- /dev/null +++ b/tests/samples/sample1-sample3-cut-out-end-joined.srt @@ -0,0 +1,8 @@ +1 +00:00:48,900 --> 00:00:49,800 +This one is full of HTML tags. +Above, below, everywhere + +2 +10:03:49,800 --> 10:05:02,000 +This one has even more whitespace! diff --git a/tests/samples/sample1-shifted-minus-1h-begin.srt b/tests/samples/sample1-shifted-minus-1h-begin.srt new file mode 100644 index 0000000..2b09ba4 --- /dev/null +++ b/tests/samples/sample1-shifted-minus-1h-begin.srt @@ -0,0 +1,8 @@ +1 +00:00:48,900 --> 00:00:49,800 +This one is full of HTML tags. +Above, below, everywhere + +2 +00:00:51,800 --> 00:00:52,700 +Even 's! diff --git a/tests/samples/sample1-stripped-end.srt b/tests/samples/sample1-stripped-end.srt new file mode 100644 index 0000000..b68798d --- /dev/null +++ b/tests/samples/sample1-stripped-end.srt @@ -0,0 +1,23 @@ +1 +00:00:48,900 --> 00:00:49,800 +This one is full of HTML tags. +Above, below, everywhere + +2 +00:00:51,800 --> 00:00:52,700 +Even 's! + +3 +00:00:53,500 --> 00:00:55,200 +The script should not +care whether the tag is +valid or not + +4 +00:00:56,000 --> 00:00:57,000 +

It should just strip all of +them mercilessly

+ +5 +00:00:58,100 --> 00:00:59,600 +
    Including this one!