Implement new flags
This commit is contained in:
parent
e51110847e
commit
d9c25e1b27
24
README.md
24
README.md
|
@ -9,8 +9,6 @@ pip install fsub
|
|||
|
||||
# Usage
|
||||
```
|
||||
usage: fsub [-h] [-c] [-s MS] [-n] [-f F] [-j] [-r] file [file ...]
|
||||
|
||||
Fix, edit and clean SubRip (.srt) files.
|
||||
|
||||
positional arguments:
|
||||
|
@ -18,14 +16,30 @@ positional arguments:
|
|||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-f F, --config F use F as the config file (by default, on Unix it is:
|
||||
"$HOME/.config/fsubrc"; on Windows it is: "%APPDATA%\fsubrc")
|
||||
-r, --replace edit files in-place (--join will delete joined files too), instead of the
|
||||
default behavior of outputing results into files prefixed with "out-"
|
||||
-p, --stdout dump results to stdout, and do not edit nor write any file
|
||||
|
||||
processing:
|
||||
Flags that specify an action to be taken. Many may be specified.
|
||||
|
||||
-c, --clean remove subtitles matching regular expressions listed in the config file
|
||||
(this is the default behavior if no other flag is passed)
|
||||
-s MS, --shift MS shift all subtitles by MS milliseconds, which may be positive or negative
|
||||
-n, --no-html strip HTML tags from subtitles content
|
||||
-f F, --config F use F as the config file (by default, F is: on Unix:
|
||||
$HOME/.config/fsubrc; on Windows: %APPDATA%\fsubrc)
|
||||
-j, --join join all files into the first, shifting their time accordingly
|
||||
-r, --replace edit files in-place (-j will delete joined files too)
|
||||
-u, --cut-out cut the specified section from the file(s) into new files
|
||||
|
||||
sectioning:
|
||||
Flags that specify a section to work in. They accept either a subtitle number or a time
|
||||
stamp in the SubRip format ("<hours>:<minutes>:<seconds>,<milliseconds>", where hours,
|
||||
minutes, seconds are 2-zero padded while milliseconds is 3-zero padded). fsub will not
|
||||
modify subtitles outside this range, except while joining the files.
|
||||
|
||||
-b B, --begin B specify section beginning (inclusive)
|
||||
-e E, --end E specify section end (inclusive)
|
||||
```
|
||||
|
||||
# Testing
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[metadata]
|
||||
name = fsub
|
||||
version = 0.1.3
|
||||
version = 1.0.0
|
||||
author = Augusto Lenz Gunsch
|
||||
author_email = augustogunsch@tutanota.com
|
||||
description = CLI SubRip editor
|
||||
|
|
176
src/fsub/fsub.py
176
src/fsub/fsub.py
|
@ -96,6 +96,30 @@ class TimeStamp:
|
|||
(self.hours, self.minutes, self.seconds, self.millisecods)
|
||||
|
||||
|
||||
class SectionMarker:
|
||||
def __init__(self, arg):
|
||||
try:
|
||||
self.marker = TimeStamp(arg)
|
||||
except Exception:
|
||||
try:
|
||||
self.marker = int(arg)
|
||||
except Exception:
|
||||
panic('Invalid section marker argument', 1)
|
||||
|
||||
def include_after(self, other):
|
||||
if type(self.marker) is TimeStamp:
|
||||
return other.time_start >= self.marker
|
||||
return other.number >= self.marker
|
||||
|
||||
def include_before(self, other):
|
||||
if type(self.marker) is TimeStamp:
|
||||
return other.time_end <= self.marker
|
||||
return other.number <= self.marker
|
||||
|
||||
def __le__(self, other):
|
||||
return int(self) <= int(other)
|
||||
|
||||
|
||||
class Subtitle:
|
||||
# Parse a single subtitle
|
||||
def __init__(self, lines, file_name, line_number):
|
||||
|
@ -235,18 +259,10 @@ class SubripFile:
|
|||
self.subs += other.subs
|
||||
return self
|
||||
|
||||
def clean(self, expressions):
|
||||
if len(expressions) == 0:
|
||||
return
|
||||
|
||||
# Remove lines matching any expression
|
||||
for regexp in expressions:
|
||||
self.subs = [sub for sub in self.subs if not sub.matches(regexp)]
|
||||
|
||||
def shift(self, ms):
|
||||
for sub in self.subs:
|
||||
sub.shift(ms)
|
||||
self.subs = list(filter(lambda sub: sub.time_start >= 0, self.subs))
|
||||
self.subs = [sub for sub in self.subs if sub.time_start >= 0]
|
||||
|
||||
def strip_html(self):
|
||||
p = re.compile('<.+?>')
|
||||
|
@ -260,26 +276,54 @@ class SubripFile:
|
|||
i += 1
|
||||
|
||||
def process(self, args, config):
|
||||
if args.clean:
|
||||
self.clean(config.expressions)
|
||||
html_regex = re.compile('<.+?>')
|
||||
new_subs = []
|
||||
for sub in self.subs:
|
||||
if args.begin and not args.begin.include_after(sub):
|
||||
new_subs.append(sub)
|
||||
continue
|
||||
|
||||
if args.shift:
|
||||
self.shift(args.shift)
|
||||
if args.end and not args.end.include_before(sub):
|
||||
new_subs.append(sub)
|
||||
continue
|
||||
|
||||
if args.no_html:
|
||||
self.strip_html()
|
||||
if args.clean and len(config.expressions) > 0:
|
||||
if any(sub.matches(regex) for regex in config.expressions):
|
||||
continue
|
||||
|
||||
if args.shift:
|
||||
sub.shift(args.shift)
|
||||
if sub.time_start < 0:
|
||||
continue
|
||||
|
||||
if args.no_html:
|
||||
sub.replace(html_regex, '')
|
||||
|
||||
new_subs.append(sub)
|
||||
|
||||
self.subs = new_subs
|
||||
|
||||
self.renumber()
|
||||
self.write_file(args.replace)
|
||||
|
||||
def write_file(self, in_place=False, stdout=False):
|
||||
self.renumber()
|
||||
|
||||
if stdout:
|
||||
print(self)
|
||||
return
|
||||
|
||||
file = self.file_name if in_place else 'out-' + self.file_name
|
||||
try:
|
||||
if in_place:
|
||||
path = self.file_name
|
||||
output = open(path, 'w', encoding='utf-8')
|
||||
else:
|
||||
path = Path(self.file_name)
|
||||
path = path.with_name('out-' + path.name)
|
||||
output = path.open(mode='w', encoding='utf-8')
|
||||
except PermissionError:
|
||||
panic('Can\'t access file {}: Permission denied'
|
||||
.format(path), 1)
|
||||
|
||||
output = open(file, 'w', encoding='utf-8')
|
||||
output.write(repr(self))
|
||||
|
||||
if len(self.subs) > 0:
|
||||
|
@ -291,6 +335,12 @@ class SubripFile:
|
|||
os.remove(self.file_name)
|
||||
del self
|
||||
|
||||
def trunc_before(self, marker):
|
||||
self.subs = [sub for sub in self.subs if marker.include_after(sub)]
|
||||
|
||||
def trunc_after(self, marker):
|
||||
self.subs = [sub for sub in self.subs if marker.include_before(sub)]
|
||||
|
||||
def __repr__(self):
|
||||
return '\n\n'.join(map(repr, self.subs))
|
||||
|
||||
|
@ -302,29 +352,6 @@ def parse_args(args):
|
|||
add_help=True
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-c', '--clean',
|
||||
help='remove subtitles matching regular expressions ' +
|
||||
'listed in the config file (this is the default ' +
|
||||
'behavior if no other flag is passed)',
|
||||
action='store_true'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--shift',
|
||||
help='shift all subtitles by MS milliseconds, which ' +
|
||||
'may be positive or negative',
|
||||
metavar='MS',
|
||||
action='store',
|
||||
type=int
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-n', '--no-html',
|
||||
help='strip HTML tags from subtitles content',
|
||||
action='store_true'
|
||||
)
|
||||
|
||||
# Requires --clean
|
||||
parser.add_argument(
|
||||
'-f', '--config',
|
||||
|
@ -335,18 +362,45 @@ def parse_args(args):
|
|||
type=argparse.FileType('r')
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
processing = parser.add_argument_group(
|
||||
'processing',
|
||||
'Flags that specify an action to be taken. Many may ' +
|
||||
'be specified.'
|
||||
)
|
||||
|
||||
processing.add_argument(
|
||||
'-c', '--clean',
|
||||
help='remove subtitles matching regular expressions ' +
|
||||
'listed in the config file (this is the default ' +
|
||||
'behavior if no other flag is passed)',
|
||||
action='store_true'
|
||||
)
|
||||
|
||||
processing.add_argument(
|
||||
'-s', '--shift',
|
||||
help='shift all subtitles by MS milliseconds, which ' +
|
||||
'may be positive or negative',
|
||||
metavar='MS',
|
||||
action='store',
|
||||
type=int
|
||||
)
|
||||
|
||||
processing.add_argument(
|
||||
'-n', '--no-html',
|
||||
help='strip HTML tags from subtitles content',
|
||||
action='store_true'
|
||||
)
|
||||
|
||||
processing.add_argument(
|
||||
'-j', '--join',
|
||||
help='join all files into the first, shifting their time accordingly',
|
||||
action='store_true'
|
||||
)
|
||||
|
||||
# Requires --begin
|
||||
parser.add_argument(
|
||||
# Requires --begin or --end, may have both
|
||||
processing.add_argument(
|
||||
'-u', '--cut-out',
|
||||
help='cut out the specified section from the file(s), creating ' +
|
||||
'for every input file a new one prefixed with "cut-" ' +
|
||||
'(--join will join both the input files and the cutouts)',
|
||||
help='cut the specified section from the file(s) into new files',
|
||||
action='store_true'
|
||||
)
|
||||
|
||||
|
@ -385,29 +439,39 @@ def parse_args(args):
|
|||
|
||||
section.add_argument(
|
||||
'-b', '--begin',
|
||||
help='specify section beginning (by default, beginning of file)',
|
||||
help='specify section beginning (inclusive)',
|
||||
metavar='B',
|
||||
action='store'
|
||||
)
|
||||
|
||||
section.add_argument(
|
||||
'-e', '--end',
|
||||
help='specify section end (by default, end of file)',
|
||||
help='specify section end (inclusive)',
|
||||
metavar='E',
|
||||
action='store'
|
||||
)
|
||||
|
||||
args = parser.parse_args(args)
|
||||
|
||||
# Flags that require section
|
||||
if args.cut_out:
|
||||
if not args.begin and not args.end:
|
||||
panic('You must specify a section to work with', 1)
|
||||
|
||||
# Make sure --clean is the default
|
||||
# TODO: account for new options
|
||||
if not args.shift and not args.no_html:
|
||||
if not any((args.shift, args.no_html, args.join, args.cut_out)):
|
||||
args.clean = True
|
||||
|
||||
# Validate options
|
||||
if not args.clean and args.config:
|
||||
panic('-f requires -c', 1)
|
||||
|
||||
if args.begin:
|
||||
args.begin = SectionMarker(args.begin)
|
||||
|
||||
if args.end:
|
||||
args.end = SectionMarker(args.end)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
|
@ -419,6 +483,15 @@ def run(args):
|
|||
for file in args.files:
|
||||
parsed_files.append(SubripFile(file))
|
||||
|
||||
if args.cut_out:
|
||||
if args.begin:
|
||||
for file in parsed_files:
|
||||
file.trunc_before(args.begin)
|
||||
|
||||
if args.end:
|
||||
for file in parsed_files:
|
||||
file.trunc_after(args.end)
|
||||
|
||||
if args.join:
|
||||
first = parsed_files.pop(0)
|
||||
while True:
|
||||
|
@ -429,7 +502,6 @@ def run(args):
|
|||
except IndexError:
|
||||
break
|
||||
parsed_files.append(first)
|
||||
first.renumber()
|
||||
|
||||
for file in parsed_files:
|
||||
file.process(args, config)
|
||||
|
|
|
@ -9,39 +9,39 @@ from pathlib import Path
|
|||
class TestFsub(unittest.TestCase):
|
||||
samples = Path('tests/samples')
|
||||
|
||||
def run_on(self, args, samples, ofiles, replace=False):
|
||||
def run_on(self, args, samples, expect_out_files, replace=False):
|
||||
caller = inspect.stack()[1][3]
|
||||
ifiles = []
|
||||
cloned_samples = []
|
||||
|
||||
samples = map(lambda s: str(self.samples / s) + '.srt', samples)
|
||||
i = 1
|
||||
for sample in samples:
|
||||
ifile = str(i) + '.' + caller + '.srt'
|
||||
shutil.copy(sample, ifile)
|
||||
args.append(ifile)
|
||||
ifiles.append(ifile)
|
||||
cloned_sample = str(i) + '.' + caller + '.srt'
|
||||
shutil.copy(sample, cloned_sample)
|
||||
args.append(cloned_sample)
|
||||
cloned_samples.append(cloned_sample)
|
||||
i += 1
|
||||
|
||||
fsub.run(args)
|
||||
|
||||
limit = len(ofiles)
|
||||
for i, ifile in enumerate(ifiles):
|
||||
limit = len(expect_out_files)
|
||||
for i, cloned_sample in enumerate(cloned_samples):
|
||||
if i < limit:
|
||||
if not replace:
|
||||
os.remove(ifile)
|
||||
ifile = 'out-' + ifile
|
||||
out = open(ifile)
|
||||
os.remove(cloned_sample)
|
||||
cloned_sample = 'out-' + cloned_sample
|
||||
out = open(cloned_sample)
|
||||
result = out.read()
|
||||
out.close()
|
||||
|
||||
ofile = str(self.samples / ofiles[i]) + '.srt'
|
||||
cmp_file = open(ofile)
|
||||
cmp = cmp_file.read()
|
||||
cmp_file.close()
|
||||
expect_out_file = str(self.samples/expect_out_files[i])+'.srt'
|
||||
expect_out_file = open(expect_out_file)
|
||||
expect_out = expect_out_file.read()
|
||||
expect_out_file.close()
|
||||
|
||||
self.assertEqual(result, cmp)
|
||||
self.assertEqual(result, expect_out)
|
||||
try:
|
||||
os.remove(ifile)
|
||||
os.remove(cloned_sample)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
@ -49,9 +49,17 @@ class TestFsub(unittest.TestCase):
|
|||
args = ['-f', str(self.samples / 'blacklist')]
|
||||
self.run_on(args, ['sample1'], ['sample1-cleaned'])
|
||||
|
||||
def test_cleaned_begin(self):
|
||||
args = ['-f', str(self.samples / 'blacklist'), '-b', '3']
|
||||
self.run_on(args, ['sample1'], ['sample1-cleaned-begin'])
|
||||
|
||||
def test_stripped(self):
|
||||
self.run_on(['-n'], ['sample1'], ['sample1-stripped'])
|
||||
|
||||
def test_stripped_end(self):
|
||||
args = ['-n', '-e', '00:00:55,500']
|
||||
self.run_on(args, ['sample1'], ['sample1-stripped-end'])
|
||||
|
||||
def test_cleaned_stripped(self):
|
||||
args = ['-c', '-f', str(self.samples / 'blacklist'), '-n']
|
||||
self.run_on(args, ['sample1'], ['sample1-cleaned-stripped'])
|
||||
|
@ -71,11 +79,32 @@ class TestFsub(unittest.TestCase):
|
|||
args = ['-s', '-52000']
|
||||
self.run_on(args, ['sample1'], ['sample1-shifted-minus-52s'])
|
||||
|
||||
def test_shifted_minus_1h_begin(self):
|
||||
args = ['-s', '-3600000', '-b', '00:00:53,500']
|
||||
self.run_on(args, ['sample1'], ['sample1-shifted-minus-1h-begin'])
|
||||
|
||||
def test_joined(self):
|
||||
args = ['-j']
|
||||
self.run_on(args, ['sample1', 'sample2', 'sample3'],
|
||||
['sample1-sample2-sample3-joined'])
|
||||
|
||||
def test_cut_begin(self):
|
||||
args = ['-b', '2', '-u']
|
||||
self.run_on(args, ['sample1'], ['sample1-cut-out-begin'])
|
||||
|
||||
def test_cut_end(self):
|
||||
args = ['-e', '1', '-u']
|
||||
self.run_on(args, ['sample1'], ['sample1-cut-out-end'])
|
||||
|
||||
def test_cut_begin_end(self):
|
||||
args = ['-b', '2', '-e', '4', '-u']
|
||||
self.run_on(args, ['sample1'], ['sample1-cut-out-begin-end'])
|
||||
|
||||
def test_cut_end_joined(self):
|
||||
args = ['-e', '1', '-u', '-j']
|
||||
self.run_on(args, ['sample1', 'sample3'],
|
||||
['sample1-sample3-cut-out-end-joined'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
1
|
||||
00:00:48,900 --> 00:00:49,800
|
||||
<b>This one is full of HTML tags.</b>
|
||||
<i>Above, below, everywhere</i>
|
||||
|
||||
2
|
||||
00:00:51,800 --> 00:00:52,700
|
||||
<a href='dummy'>Even <a>'s!</a>
|
||||
|
||||
3
|
||||
00:00:56,000 --> 00:00:57,000
|
||||
<p>It should just strip all of
|
||||
them mercilessly</p>
|
||||
|
||||
4
|
||||
00:00:58,100 --> 00:00:59,600
|
||||
<ul>Including this one!</ul>
|
|
@ -0,0 +1,14 @@
|
|||
1
|
||||
00:00:51,800 --> 00:00:52,700
|
||||
<a href='dummy'>Even <a>'s!</a>
|
||||
|
||||
2
|
||||
00:00:53,500 --> 00:00:55,200
|
||||
<html>The script should not
|
||||
care whether the tag is
|
||||
valid or not</html>
|
||||
|
||||
3
|
||||
00:00:56,000 --> 00:00:57,000
|
||||
<p>It should just strip all of
|
||||
them mercilessly</p>
|
|
@ -0,0 +1,18 @@
|
|||
1
|
||||
00:00:51,800 --> 00:00:52,700
|
||||
<a href='dummy'>Even <a>'s!</a>
|
||||
|
||||
2
|
||||
00:00:53,500 --> 00:00:55,200
|
||||
<html>The script should not
|
||||
care whether the tag is
|
||||
valid or not</html>
|
||||
|
||||
3
|
||||
00:00:56,000 --> 00:00:57,000
|
||||
<p>It should just strip all of
|
||||
them mercilessly</p>
|
||||
|
||||
4
|
||||
00:00:58,100 --> 00:00:59,600
|
||||
<ul>Including this one!</ul>
|
|
@ -0,0 +1,4 @@
|
|||
1
|
||||
00:00:48,900 --> 00:00:49,800
|
||||
<b>This one is full of HTML tags.</b>
|
||||
<i>Above, below, everywhere</i>
|
|
@ -0,0 +1,8 @@
|
|||
1
|
||||
00:00:48,900 --> 00:00:49,800
|
||||
<b>This one is full of HTML tags.</b>
|
||||
<i>Above, below, everywhere</i>
|
||||
|
||||
2
|
||||
10:03:49,800 --> 10:05:02,000
|
||||
This one has even more whitespace!
|
|
@ -0,0 +1,8 @@
|
|||
1
|
||||
00:00:48,900 --> 00:00:49,800
|
||||
<b>This one is full of HTML tags.</b>
|
||||
<i>Above, below, everywhere</i>
|
||||
|
||||
2
|
||||
00:00:51,800 --> 00:00:52,700
|
||||
<a href='dummy'>Even <a>'s!</a>
|
|
@ -0,0 +1,23 @@
|
|||
1
|
||||
00:00:48,900 --> 00:00:49,800
|
||||
This one is full of HTML tags.
|
||||
Above, below, everywhere
|
||||
|
||||
2
|
||||
00:00:51,800 --> 00:00:52,700
|
||||
Even 's!
|
||||
|
||||
3
|
||||
00:00:53,500 --> 00:00:55,200
|
||||
The script should not
|
||||
care whether the tag is
|
||||
valid or not
|
||||
|
||||
4
|
||||
00:00:56,000 --> 00:00:57,000
|
||||
<p>It should just strip all of
|
||||
them mercilessly</p>
|
||||
|
||||
5
|
||||
00:00:58,100 --> 00:00:59,600
|
||||
<ul>Including this one!</ul>
|
Loading…
Reference in New Issue