Implement new flags

This commit is contained in:
Augusto Gunsch 2021-11-17 20:45:38 -03:00
parent e51110847e
commit d9c25e1b27
No known key found for this signature in database
GPG Key ID: F7EEFE29825C72DC
11 changed files with 282 additions and 75 deletions

View File

@ -9,8 +9,6 @@ pip install fsub
# Usage
```
usage: fsub [-h] [-c] [-s MS] [-n] [-f F] [-j] [-r] file [file ...]
Fix, edit and clean SubRip (.srt) files.
positional arguments:
@ -18,14 +16,30 @@ positional arguments:
optional arguments:
-h, --help show this help message and exit
-f F, --config F use F as the config file (by default, on Unix it is:
"$HOME/.config/fsubrc"; on Windows it is: "%APPDATA%\fsubrc")
-r, --replace edit files in-place (--join will delete joined files too), instead of the
default behavior of outputing results into files prefixed with "out-"
-p, --stdout dump results to stdout, and do not edit nor write any file
processing:
Flags that specify an action to be taken. Many may be specified.
-c, --clean remove subtitles matching regular expressions listed in the config file
(this is the default behavior if no other flag is passed)
-s MS, --shift MS shift all subtitles by MS milliseconds, which may be positive or negative
-n, --no-html strip HTML tags from subtitles content
-f F, --config F use F as the config file (by default, F is: on Unix:
$HOME/.config/fsubrc; on Windows: %APPDATA%\fsubrc)
-j, --join join all files into the first, shifting their time accordingly
-r, --replace edit files in-place (-j will delete joined files too)
-u, --cut-out cut the specified section from the file(s) into new files
sectioning:
Flags that specify a section to work in. They accept either a subtitle number or a time
stamp in the SubRip format ("<hours>:<minutes>:<seconds>,<milliseconds>", where hours,
minutes, seconds are 2-zero padded while milliseconds is 3-zero padded). fsub will not
modify subtitles outside this range, except while joining the files.
-b B, --begin B specify section beginning (inclusive)
-e E, --end E specify section end (inclusive)
```
# Testing

View File

@ -1,6 +1,6 @@
[metadata]
name = fsub
version = 0.1.3
version = 1.0.0
author = Augusto Lenz Gunsch
author_email = augustogunsch@tutanota.com
description = CLI SubRip editor

View File

@ -96,6 +96,30 @@ class TimeStamp:
(self.hours, self.minutes, self.seconds, self.millisecods)
class SectionMarker:
def __init__(self, arg):
try:
self.marker = TimeStamp(arg)
except Exception:
try:
self.marker = int(arg)
except Exception:
panic('Invalid section marker argument', 1)
def include_after(self, other):
if type(self.marker) is TimeStamp:
return other.time_start >= self.marker
return other.number >= self.marker
def include_before(self, other):
if type(self.marker) is TimeStamp:
return other.time_end <= self.marker
return other.number <= self.marker
def __le__(self, other):
return int(self) <= int(other)
class Subtitle:
# Parse a single subtitle
def __init__(self, lines, file_name, line_number):
@ -235,18 +259,10 @@ class SubripFile:
self.subs += other.subs
return self
def clean(self, expressions):
if len(expressions) == 0:
return
# Remove lines matching any expression
for regexp in expressions:
self.subs = [sub for sub in self.subs if not sub.matches(regexp)]
def shift(self, ms):
for sub in self.subs:
sub.shift(ms)
self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs))
self.subs = [sub for sub in self.subs if sub.time_start >= 0]
def strip_html(self):
p = re.compile('<.+?>')
@ -260,26 +276,54 @@ class SubripFile:
i += 1
def process(self, args, config):
if args.clean:
self.clean(config.expressions)
html_regex = re.compile('<.+?>')
new_subs = []
for sub in self.subs:
if args.begin and not args.begin.include_after(sub):
new_subs.append(sub)
continue
if args.end and not args.end.include_before(sub):
new_subs.append(sub)
continue
if args.clean and len(config.expressions) > 0:
if any(sub.matches(regex) for regex in config.expressions):
continue
if args.shift:
self.shift(args.shift)
sub.shift(args.shift)
if sub.time_start < 0:
continue
if args.no_html:
self.strip_html()
sub.replace(html_regex, '')
new_subs.append(sub)
self.subs = new_subs
self.renumber()
self.write_file(args.replace)
def write_file(self, in_place=False, stdout=False):
self.renumber()
if stdout:
print(self)
return
file = self.file_name if in_place else 'out-' + self.file_name
try:
if in_place:
path = self.file_name
output = open(path, 'w', encoding='utf-8')
else:
path = Path(self.file_name)
path = path.with_name('out-' + path.name)
output = path.open(mode='w', encoding='utf-8')
except PermissionError:
panic('Can\'t access file {}: Permission denied'
.format(path), 1)
output = open(file, 'w', encoding='utf-8')
output.write(repr(self))
if len(self.subs) > 0:
@ -291,6 +335,12 @@ class SubripFile:
os.remove(self.file_name)
del self
def trunc_before(self, marker):
self.subs = [sub for sub in self.subs if marker.include_after(sub)]
def trunc_after(self, marker):
self.subs = [sub for sub in self.subs if marker.include_before(sub)]
def __repr__(self):
return '\n\n'.join(map(repr, self.subs))
@ -302,29 +352,6 @@ def parse_args(args):
add_help=True
)
parser.add_argument(
'-c', '--clean',
help='remove subtitles matching regular expressions ' +
'listed in the config file (this is the default ' +
'behavior if no other flag is passed)',
action='store_true'
)
parser.add_argument(
'-s', '--shift',
help='shift all subtitles by MS milliseconds, which ' +
'may be positive or negative',
metavar='MS',
action='store',
type=int
)
parser.add_argument(
'-n', '--no-html',
help='strip HTML tags from subtitles content',
action='store_true'
)
# Requires --clean
parser.add_argument(
'-f', '--config',
@ -335,18 +362,45 @@ def parse_args(args):
type=argparse.FileType('r')
)
parser.add_argument(
processing = parser.add_argument_group(
'processing',
'Flags that specify an action to be taken. Many may ' +
'be specified.'
)
processing.add_argument(
'-c', '--clean',
help='remove subtitles matching regular expressions ' +
'listed in the config file (this is the default ' +
'behavior if no other flag is passed)',
action='store_true'
)
processing.add_argument(
'-s', '--shift',
help='shift all subtitles by MS milliseconds, which ' +
'may be positive or negative',
metavar='MS',
action='store',
type=int
)
processing.add_argument(
'-n', '--no-html',
help='strip HTML tags from subtitles content',
action='store_true'
)
processing.add_argument(
'-j', '--join',
help='join all files into the first, shifting their time accordingly',
action='store_true'
)
# Requires --begin
parser.add_argument(
# Requires --begin or --end, may have both
processing.add_argument(
'-u', '--cut-out',
help='cut out the specified section from the file(s), creating ' +
'for every input file a new one prefixed with "cut-" ' +
'(--join will join both the input files and the cutouts)',
help='cut the specified section from the file(s) into new files',
action='store_true'
)
@ -385,29 +439,39 @@ def parse_args(args):
section.add_argument(
'-b', '--begin',
help='specify section beginning (by default, beginning of file)',
help='specify section beginning (inclusive)',
metavar='B',
action='store'
)
section.add_argument(
'-e', '--end',
help='specify section end (by default, end of file)',
help='specify section end (inclusive)',
metavar='E',
action='store'
)
args = parser.parse_args(args)
# Flags that require section
if args.cut_out:
if not args.begin and not args.end:
panic('You must specify a section to work with', 1)
# Make sure --clean is the default
# TODO: account for new options
if not args.shift and not args.no_html:
if not any((args.shift, args.no_html, args.join, args.cut_out)):
args.clean = True
# Validate options
if not args.clean and args.config:
panic('-f requires -c', 1)
if args.begin:
args.begin = SectionMarker(args.begin)
if args.end:
args.end = SectionMarker(args.end)
return args
@ -419,6 +483,15 @@ def run(args):
for file in args.files:
parsed_files.append(SubripFile(file))
if args.cut_out:
if args.begin:
for file in parsed_files:
file.trunc_before(args.begin)
if args.end:
for file in parsed_files:
file.trunc_after(args.end)
if args.join:
first = parsed_files.pop(0)
while True:
@ -429,7 +502,6 @@ def run(args):
except IndexError:
break
parsed_files.append(first)
first.renumber()
for file in parsed_files:
file.process(args, config)

View File

@ -9,39 +9,39 @@ from pathlib import Path
class TestFsub(unittest.TestCase):
samples = Path('tests/samples')
def run_on(self, args, samples, ofiles, replace=False):
def run_on(self, args, samples, expect_out_files, replace=False):
caller = inspect.stack()[1][3]
ifiles = []
cloned_samples = []
samples = map(lambda s: str(self.samples / s) + '.srt', samples)
i = 1
for sample in samples:
ifile = str(i) + '.' + caller + '.srt'
shutil.copy(sample, ifile)
args.append(ifile)
ifiles.append(ifile)
cloned_sample = str(i) + '.' + caller + '.srt'
shutil.copy(sample, cloned_sample)
args.append(cloned_sample)
cloned_samples.append(cloned_sample)
i += 1
fsub.run(args)
limit = len(ofiles)
for i, ifile in enumerate(ifiles):
limit = len(expect_out_files)
for i, cloned_sample in enumerate(cloned_samples):
if i < limit:
if not replace:
os.remove(ifile)
ifile = 'out-' + ifile
out = open(ifile)
os.remove(cloned_sample)
cloned_sample = 'out-' + cloned_sample
out = open(cloned_sample)
result = out.read()
out.close()
ofile = str(self.samples / ofiles[i]) + '.srt'
cmp_file = open(ofile)
cmp = cmp_file.read()
cmp_file.close()
expect_out_file = str(self.samples/expect_out_files[i])+'.srt'
expect_out_file = open(expect_out_file)
expect_out = expect_out_file.read()
expect_out_file.close()
self.assertEqual(result, cmp)
self.assertEqual(result, expect_out)
try:
os.remove(ifile)
os.remove(cloned_sample)
except FileNotFoundError:
pass
@ -49,9 +49,17 @@ class TestFsub(unittest.TestCase):
args = ['-f', str(self.samples / 'blacklist')]
self.run_on(args, ['sample1'], ['sample1-cleaned'])
def test_cleaned_begin(self):
args = ['-f', str(self.samples / 'blacklist'), '-b', '3']
self.run_on(args, ['sample1'], ['sample1-cleaned-begin'])
def test_stripped(self):
self.run_on(['-n'], ['sample1'], ['sample1-stripped'])
def test_stripped_end(self):
args = ['-n', '-e', '00:00:55,500']
self.run_on(args, ['sample1'], ['sample1-stripped-end'])
def test_cleaned_stripped(self):
args = ['-c', '-f', str(self.samples / 'blacklist'), '-n']
self.run_on(args, ['sample1'], ['sample1-cleaned-stripped'])
@ -71,11 +79,32 @@ class TestFsub(unittest.TestCase):
args = ['-s', '-52000']
self.run_on(args, ['sample1'], ['sample1-shifted-minus-52s'])
def test_shifted_minus_1h_begin(self):
args = ['-s', '-3600000', '-b', '00:00:53,500']
self.run_on(args, ['sample1'], ['sample1-shifted-minus-1h-begin'])
def test_joined(self):
args = ['-j']
self.run_on(args, ['sample1', 'sample2', 'sample3'],
['sample1-sample2-sample3-joined'])
def test_cut_begin(self):
args = ['-b', '2', '-u']
self.run_on(args, ['sample1'], ['sample1-cut-out-begin'])
def test_cut_end(self):
args = ['-e', '1', '-u']
self.run_on(args, ['sample1'], ['sample1-cut-out-end'])
def test_cut_begin_end(self):
args = ['-b', '2', '-e', '4', '-u']
self.run_on(args, ['sample1'], ['sample1-cut-out-begin-end'])
def test_cut_end_joined(self):
args = ['-e', '1', '-u', '-j']
self.run_on(args, ['sample1', 'sample3'],
['sample1-sample3-cut-out-end-joined'])
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,17 @@
1
00:00:48,900 --> 00:00:49,800
<b>This one is full of HTML tags.</b>
<i>Above, below, everywhere</i>
2
00:00:51,800 --> 00:00:52,700
<a href='dummy'>Even <a>'s!</a>
3
00:00:56,000 --> 00:00:57,000
<p>It should just strip all of
them mercilessly</p>
4
00:00:58,100 --> 00:00:59,600
<ul>Including this one!</ul>

View File

@ -0,0 +1,14 @@
1
00:00:51,800 --> 00:00:52,700
<a href='dummy'>Even <a>'s!</a>
2
00:00:53,500 --> 00:00:55,200
<html>The script should not
care whether the tag is
valid or not</html>
3
00:00:56,000 --> 00:00:57,000
<p>It should just strip all of
them mercilessly</p>

View File

@ -0,0 +1,18 @@
1
00:00:51,800 --> 00:00:52,700
<a href='dummy'>Even <a>'s!</a>
2
00:00:53,500 --> 00:00:55,200
<html>The script should not
care whether the tag is
valid or not</html>
3
00:00:56,000 --> 00:00:57,000
<p>It should just strip all of
them mercilessly</p>
4
00:00:58,100 --> 00:00:59,600
<ul>Including this one!</ul>

View File

@ -0,0 +1,4 @@
1
00:00:48,900 --> 00:00:49,800
<b>This one is full of HTML tags.</b>
<i>Above, below, everywhere</i>

View File

@ -0,0 +1,8 @@
1
00:00:48,900 --> 00:00:49,800
<b>This one is full of HTML tags.</b>
<i>Above, below, everywhere</i>
2
10:03:49,800 --> 10:05:02,000
This one has even more whitespace!

View File

@ -0,0 +1,8 @@
1
00:00:48,900 --> 00:00:49,800
<b>This one is full of HTML tags.</b>
<i>Above, below, everywhere</i>
2
00:00:51,800 --> 00:00:52,700
<a href='dummy'>Even <a>'s!</a>

View File

@ -0,0 +1,23 @@
1
00:00:48,900 --> 00:00:49,800
This one is full of HTML tags.
Above, below, everywhere
2
00:00:51,800 --> 00:00:52,700
Even 's!
3
00:00:53,500 --> 00:00:55,200
The script should not
care whether the tag is
valid or not
4
00:00:56,000 --> 00:00:57,000
<p>It should just strip all of
them mercilessly</p>
5
00:00:58,100 --> 00:00:59,600
<ul>Including this one!</ul>