Implement new flags

This commit is contained in:
Augusto Gunsch 2021-11-17 20:45:38 -03:00
parent e51110847e
commit d9c25e1b27
No known key found for this signature in database
GPG Key ID: F7EEFE29825C72DC
11 changed files with 282 additions and 75 deletions

View File

@ -9,8 +9,6 @@ pip install fsub
# Usage # Usage
``` ```
usage: fsub [-h] [-c] [-s MS] [-n] [-f F] [-j] [-r] file [file ...]
Fix, edit and clean SubRip (.srt) files. Fix, edit and clean SubRip (.srt) files.
positional arguments: positional arguments:
@ -18,14 +16,30 @@ positional arguments:
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-f F, --config F use F as the config file (by default, on Unix it is:
"$HOME/.config/fsubrc"; on Windows it is: "%APPDATA%\fsubrc")
-r, --replace edit files in-place (--join will delete joined files too), instead of the
default behavior of outputing results into files prefixed with "out-"
-p, --stdout dump results to stdout, and do not edit nor write any file
processing:
Flags that specify an action to be taken. Many may be specified.
-c, --clean remove subtitles matching regular expressions listed in the config file -c, --clean remove subtitles matching regular expressions listed in the config file
(this is the default behavior if no other flag is passed) (this is the default behavior if no other flag is passed)
-s MS, --shift MS shift all subtitles by MS milliseconds, which may be positive or negative -s MS, --shift MS shift all subtitles by MS milliseconds, which may be positive or negative
-n, --no-html strip HTML tags from subtitles content -n, --no-html strip HTML tags from subtitles content
-f F, --config F use F as the config file (by default, F is: on Unix:
$HOME/.config/fsubrc; on Windows: %APPDATA%\fsubrc)
-j, --join join all files into the first, shifting their time accordingly -j, --join join all files into the first, shifting their time accordingly
-r, --replace edit files in-place (-j will delete joined files too) -u, --cut-out cut the specified section from the file(s) into new files
sectioning:
Flags that specify a section to work in. They accept either a subtitle number or a time
stamp in the SubRip format ("<hours>:<minutes>:<seconds>,<milliseconds>", where hours,
minutes, seconds are 2-zero padded while milliseconds is 3-zero padded). fsub will not
modify subtitles outside this range, except while joining the files.
-b B, --begin B specify section beginning (inclusive)
-e E, --end E specify section end (inclusive)
``` ```
# Testing # Testing

View File

@ -1,6 +1,6 @@
[metadata] [metadata]
name = fsub name = fsub
version = 0.1.3 version = 1.0.0
author = Augusto Lenz Gunsch author = Augusto Lenz Gunsch
author_email = augustogunsch@tutanota.com author_email = augustogunsch@tutanota.com
description = CLI SubRip editor description = CLI SubRip editor

View File

@ -96,6 +96,30 @@ class TimeStamp:
(self.hours, self.minutes, self.seconds, self.millisecods) (self.hours, self.minutes, self.seconds, self.millisecods)
class SectionMarker:
def __init__(self, arg):
try:
self.marker = TimeStamp(arg)
except Exception:
try:
self.marker = int(arg)
except Exception:
panic('Invalid section marker argument', 1)
def include_after(self, other):
if type(self.marker) is TimeStamp:
return other.time_start >= self.marker
return other.number >= self.marker
def include_before(self, other):
if type(self.marker) is TimeStamp:
return other.time_end <= self.marker
return other.number <= self.marker
def __le__(self, other):
return int(self) <= int(other)
class Subtitle: class Subtitle:
# Parse a single subtitle # Parse a single subtitle
def __init__(self, lines, file_name, line_number): def __init__(self, lines, file_name, line_number):
@ -235,18 +259,10 @@ class SubripFile:
self.subs += other.subs self.subs += other.subs
return self return self
def clean(self, expressions):
if len(expressions) == 0:
return
# Remove lines matching any expression
for regexp in expressions:
self.subs = [sub for sub in self.subs if not sub.matches(regexp)]
def shift(self, ms): def shift(self, ms):
for sub in self.subs: for sub in self.subs:
sub.shift(ms) sub.shift(ms)
self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs)) self.subs = [sub for sub in self.subs if sub.time_start >= 0]
def strip_html(self): def strip_html(self):
p = re.compile('<.+?>') p = re.compile('<.+?>')
@ -260,26 +276,54 @@ class SubripFile:
i += 1 i += 1
def process(self, args, config): def process(self, args, config):
if args.clean: html_regex = re.compile('<.+?>')
self.clean(config.expressions) new_subs = []
for sub in self.subs:
if args.begin and not args.begin.include_after(sub):
new_subs.append(sub)
continue
if args.end and not args.end.include_before(sub):
new_subs.append(sub)
continue
if args.clean and len(config.expressions) > 0:
if any(sub.matches(regex) for regex in config.expressions):
continue
if args.shift: if args.shift:
self.shift(args.shift) sub.shift(args.shift)
if sub.time_start < 0:
continue
if args.no_html: if args.no_html:
self.strip_html() sub.replace(html_regex, '')
new_subs.append(sub)
self.subs = new_subs
self.renumber()
self.write_file(args.replace) self.write_file(args.replace)
def write_file(self, in_place=False, stdout=False): def write_file(self, in_place=False, stdout=False):
self.renumber()
if stdout: if stdout:
print(self) print(self)
return return
file = self.file_name if in_place else 'out-' + self.file_name try:
if in_place:
path = self.file_name
output = open(path, 'w', encoding='utf-8')
else:
path = Path(self.file_name)
path = path.with_name('out-' + path.name)
output = path.open(mode='w', encoding='utf-8')
except PermissionError:
panic('Can\'t access file {}: Permission denied'
.format(path), 1)
output = open(file, 'w', encoding='utf-8')
output.write(repr(self)) output.write(repr(self))
if len(self.subs) > 0: if len(self.subs) > 0:
@ -291,6 +335,12 @@ class SubripFile:
os.remove(self.file_name) os.remove(self.file_name)
del self del self
def trunc_before(self, marker):
self.subs = [sub for sub in self.subs if marker.include_after(sub)]
def trunc_after(self, marker):
self.subs = [sub for sub in self.subs if marker.include_before(sub)]
def __repr__(self): def __repr__(self):
return '\n\n'.join(map(repr, self.subs)) return '\n\n'.join(map(repr, self.subs))
@ -302,29 +352,6 @@ def parse_args(args):
add_help=True add_help=True
) )
parser.add_argument(
'-c', '--clean',
help='remove subtitles matching regular expressions ' +
'listed in the config file (this is the default ' +
'behavior if no other flag is passed)',
action='store_true'
)
parser.add_argument(
'-s', '--shift',
help='shift all subtitles by MS milliseconds, which ' +
'may be positive or negative',
metavar='MS',
action='store',
type=int
)
parser.add_argument(
'-n', '--no-html',
help='strip HTML tags from subtitles content',
action='store_true'
)
# Requires --clean # Requires --clean
parser.add_argument( parser.add_argument(
'-f', '--config', '-f', '--config',
@ -335,18 +362,45 @@ def parse_args(args):
type=argparse.FileType('r') type=argparse.FileType('r')
) )
parser.add_argument( processing = parser.add_argument_group(
'processing',
'Flags that specify an action to be taken. Many may ' +
'be specified.'
)
processing.add_argument(
'-c', '--clean',
help='remove subtitles matching regular expressions ' +
'listed in the config file (this is the default ' +
'behavior if no other flag is passed)',
action='store_true'
)
processing.add_argument(
'-s', '--shift',
help='shift all subtitles by MS milliseconds, which ' +
'may be positive or negative',
metavar='MS',
action='store',
type=int
)
processing.add_argument(
'-n', '--no-html',
help='strip HTML tags from subtitles content',
action='store_true'
)
processing.add_argument(
'-j', '--join', '-j', '--join',
help='join all files into the first, shifting their time accordingly', help='join all files into the first, shifting their time accordingly',
action='store_true' action='store_true'
) )
# Requires --begin # Requires --begin or --end, may have both
parser.add_argument( processing.add_argument(
'-u', '--cut-out', '-u', '--cut-out',
help='cut out the specified section from the file(s), creating ' + help='cut the specified section from the file(s) into new files',
'for every input file a new one prefixed with "cut-" ' +
'(--join will join both the input files and the cutouts)',
action='store_true' action='store_true'
) )
@ -385,29 +439,39 @@ def parse_args(args):
section.add_argument( section.add_argument(
'-b', '--begin', '-b', '--begin',
help='specify section beginning (by default, beginning of file)', help='specify section beginning (inclusive)',
metavar='B', metavar='B',
action='store' action='store'
) )
section.add_argument( section.add_argument(
'-e', '--end', '-e', '--end',
help='specify section end (by default, end of file)', help='specify section end (inclusive)',
metavar='E', metavar='E',
action='store' action='store'
) )
args = parser.parse_args(args) args = parser.parse_args(args)
# Flags that require section
if args.cut_out:
if not args.begin and not args.end:
panic('You must specify a section to work with', 1)
# Make sure --clean is the default # Make sure --clean is the default
# TODO: account for new options if not any((args.shift, args.no_html, args.join, args.cut_out)):
if not args.shift and not args.no_html:
args.clean = True args.clean = True
# Validate options # Validate options
if not args.clean and args.config: if not args.clean and args.config:
panic('-f requires -c', 1) panic('-f requires -c', 1)
if args.begin:
args.begin = SectionMarker(args.begin)
if args.end:
args.end = SectionMarker(args.end)
return args return args
@ -419,6 +483,15 @@ def run(args):
for file in args.files: for file in args.files:
parsed_files.append(SubripFile(file)) parsed_files.append(SubripFile(file))
if args.cut_out:
if args.begin:
for file in parsed_files:
file.trunc_before(args.begin)
if args.end:
for file in parsed_files:
file.trunc_after(args.end)
if args.join: if args.join:
first = parsed_files.pop(0) first = parsed_files.pop(0)
while True: while True:
@ -429,7 +502,6 @@ def run(args):
except IndexError: except IndexError:
break break
parsed_files.append(first) parsed_files.append(first)
first.renumber()
for file in parsed_files: for file in parsed_files:
file.process(args, config) file.process(args, config)

View File

@ -9,39 +9,39 @@ from pathlib import Path
class TestFsub(unittest.TestCase): class TestFsub(unittest.TestCase):
samples = Path('tests/samples') samples = Path('tests/samples')
def run_on(self, args, samples, ofiles, replace=False): def run_on(self, args, samples, expect_out_files, replace=False):
caller = inspect.stack()[1][3] caller = inspect.stack()[1][3]
ifiles = [] cloned_samples = []
samples = map(lambda s: str(self.samples / s) + '.srt', samples) samples = map(lambda s: str(self.samples / s) + '.srt', samples)
i = 1 i = 1
for sample in samples: for sample in samples:
ifile = str(i) + '.' + caller + '.srt' cloned_sample = str(i) + '.' + caller + '.srt'
shutil.copy(sample, ifile) shutil.copy(sample, cloned_sample)
args.append(ifile) args.append(cloned_sample)
ifiles.append(ifile) cloned_samples.append(cloned_sample)
i += 1 i += 1
fsub.run(args) fsub.run(args)
limit = len(ofiles) limit = len(expect_out_files)
for i, ifile in enumerate(ifiles): for i, cloned_sample in enumerate(cloned_samples):
if i < limit: if i < limit:
if not replace: if not replace:
os.remove(ifile) os.remove(cloned_sample)
ifile = 'out-' + ifile cloned_sample = 'out-' + cloned_sample
out = open(ifile) out = open(cloned_sample)
result = out.read() result = out.read()
out.close() out.close()
ofile = str(self.samples / ofiles[i]) + '.srt' expect_out_file = str(self.samples/expect_out_files[i])+'.srt'
cmp_file = open(ofile) expect_out_file = open(expect_out_file)
cmp = cmp_file.read() expect_out = expect_out_file.read()
cmp_file.close() expect_out_file.close()
self.assertEqual(result, cmp) self.assertEqual(result, expect_out)
try: try:
os.remove(ifile) os.remove(cloned_sample)
except FileNotFoundError: except FileNotFoundError:
pass pass
@ -49,9 +49,17 @@ class TestFsub(unittest.TestCase):
args = ['-f', str(self.samples / 'blacklist')] args = ['-f', str(self.samples / 'blacklist')]
self.run_on(args, ['sample1'], ['sample1-cleaned']) self.run_on(args, ['sample1'], ['sample1-cleaned'])
def test_cleaned_begin(self):
args = ['-f', str(self.samples / 'blacklist'), '-b', '3']
self.run_on(args, ['sample1'], ['sample1-cleaned-begin'])
def test_stripped(self): def test_stripped(self):
self.run_on(['-n'], ['sample1'], ['sample1-stripped']) self.run_on(['-n'], ['sample1'], ['sample1-stripped'])
def test_stripped_end(self):
args = ['-n', '-e', '00:00:55,500']
self.run_on(args, ['sample1'], ['sample1-stripped-end'])
def test_cleaned_stripped(self): def test_cleaned_stripped(self):
args = ['-c', '-f', str(self.samples / 'blacklist'), '-n'] args = ['-c', '-f', str(self.samples / 'blacklist'), '-n']
self.run_on(args, ['sample1'], ['sample1-cleaned-stripped']) self.run_on(args, ['sample1'], ['sample1-cleaned-stripped'])
@ -71,11 +79,32 @@ class TestFsub(unittest.TestCase):
args = ['-s', '-52000'] args = ['-s', '-52000']
self.run_on(args, ['sample1'], ['sample1-shifted-minus-52s']) self.run_on(args, ['sample1'], ['sample1-shifted-minus-52s'])
def test_shifted_minus_1h_begin(self):
args = ['-s', '-3600000', '-b', '00:00:53,500']
self.run_on(args, ['sample1'], ['sample1-shifted-minus-1h-begin'])
def test_joined(self): def test_joined(self):
args = ['-j'] args = ['-j']
self.run_on(args, ['sample1', 'sample2', 'sample3'], self.run_on(args, ['sample1', 'sample2', 'sample3'],
['sample1-sample2-sample3-joined']) ['sample1-sample2-sample3-joined'])
def test_cut_begin(self):
args = ['-b', '2', '-u']
self.run_on(args, ['sample1'], ['sample1-cut-out-begin'])
def test_cut_end(self):
args = ['-e', '1', '-u']
self.run_on(args, ['sample1'], ['sample1-cut-out-end'])
def test_cut_begin_end(self):
args = ['-b', '2', '-e', '4', '-u']
self.run_on(args, ['sample1'], ['sample1-cut-out-begin-end'])
def test_cut_end_joined(self):
args = ['-e', '1', '-u', '-j']
self.run_on(args, ['sample1', 'sample3'],
['sample1-sample3-cut-out-end-joined'])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -0,0 +1,17 @@
1
00:00:48,900 --> 00:00:49,800
<b>This one is full of HTML tags.</b>
<i>Above, below, everywhere</i>
2
00:00:51,800 --> 00:00:52,700
<a href='dummy'>Even <a>'s!</a>
3
00:00:56,000 --> 00:00:57,000
<p>It should just strip all of
them mercilessly</p>
4
00:00:58,100 --> 00:00:59,600
<ul>Including this one!</ul>

View File

@ -0,0 +1,14 @@
1
00:00:51,800 --> 00:00:52,700
<a href='dummy'>Even <a>'s!</a>
2
00:00:53,500 --> 00:00:55,200
<html>The script should not
care whether the tag is
valid or not</html>
3
00:00:56,000 --> 00:00:57,000
<p>It should just strip all of
them mercilessly</p>

View File

@ -0,0 +1,18 @@
1
00:00:51,800 --> 00:00:52,700
<a href='dummy'>Even <a>'s!</a>
2
00:00:53,500 --> 00:00:55,200
<html>The script should not
care whether the tag is
valid or not</html>
3
00:00:56,000 --> 00:00:57,000
<p>It should just strip all of
them mercilessly</p>
4
00:00:58,100 --> 00:00:59,600
<ul>Including this one!</ul>

View File

@ -0,0 +1,4 @@
1
00:00:48,900 --> 00:00:49,800
<b>This one is full of HTML tags.</b>
<i>Above, below, everywhere</i>

View File

@ -0,0 +1,8 @@
1
00:00:48,900 --> 00:00:49,800
<b>This one is full of HTML tags.</b>
<i>Above, below, everywhere</i>
2
10:03:49,800 --> 10:05:02,000
This one has even more whitespace!

View File

@ -0,0 +1,8 @@
1
00:00:48,900 --> 00:00:49,800
<b>This one is full of HTML tags.</b>
<i>Above, below, everywhere</i>
2
00:00:51,800 --> 00:00:52,700
<a href='dummy'>Even <a>'s!</a>

View File

@ -0,0 +1,23 @@
1
00:00:48,900 --> 00:00:49,800
This one is full of HTML tags.
Above, below, everywhere
2
00:00:51,800 --> 00:00:52,700
Even 's!
3
00:00:53,500 --> 00:00:55,200
The script should not
care whether the tag is
valid or not
4
00:00:56,000 --> 00:00:57,000
<p>It should just strip all of
them mercilessly</p>
5
00:00:58,100 --> 00:00:59,600
<ul>Including this one!</ul>