diff --git a/.gitignore b/.gitignore index b4aea5a..b218332 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ dist/ *.egg-info/ __pycache__/ +test_*.srt diff --git a/src/fsub/__init__.py b/src/fsub/__init__.py index e69de29..6b60bcf 100644 --- a/src/fsub/__init__.py +++ b/src/fsub/__init__.py @@ -0,0 +1,6 @@ +import fsub + + +# External interface +def run(args): + fsub.run(args) diff --git a/src/fsub/fsub.py b/src/fsub/fsub.py index 1049b7f..2ab5799 100755 --- a/src/fsub/fsub.py +++ b/src/fsub/fsub.py @@ -37,37 +37,35 @@ def panic(message, code): class TimeStamp: def __init__(self, time_str): - parsed_time = time_str.split(':') - h = int(parsed_time[0]) - m = int(parsed_time[1]) - ms = int(parsed_time[2].replace(',', '')) - self.time = h * 3600000 + m * 60000 + ms + m = re.match(r'(\d{2,}):(\d{2}):(\d{2}),(\d{3})', time_str) + if not m: + raise Exception + + h, m, s, ms = map(int, m.groups()) + self.time = h * 3600000 + m * 60000 + s * 1000 + ms def getmilliseconds(self): return self.time % 1000 def getseconds(self): - return (self.time % 60000) / 1000 + return int((self.time % 60000) / 1000) def getminutes(self): - return (self.time / 60000) % 60 + return int((self.time / 60000) % 60) def gethours(self): - return self.time / 3600000 + return int(self.time / 3600000) millisecods = property(getmilliseconds) seconds = property(getseconds) minutes = property(getminutes) hours = property(gethours) + def __int__(self): + return self.time + def __iadd__(self, other): - t = type(other) - if t is int: - self.time += other - elif t is type(self): - self.time += other.time - else: - raise TypeError + self.time += int(other) return self def __neg__(self): @@ -79,19 +77,19 @@ class TimeStamp: return self.__iadd__(-other) def __lt__(self, other): - return self.time < other.time + return self.time < int(other) def __le__(self, other): - return self.time <= other.time + return self.time <= int(other) def __eq__(self, other): - return self.time == other.time + return self.time == int(other) def __gt__(self, other): - return self.time > other.time + return self.time > int(other) def __ge__(self, other): - return self.time >= other.time + return self.time >= int(other) def __repr__(self): return '%02d:%02d:%02d,%03d' % \ @@ -107,6 +105,7 @@ class Subtitle: try: # This is mostly ignored, as the subtitles are renumbered later self.number = int(lines.pop(0)) + assert self.number > 0 except Exception: panic('Invalid line number detected ({}:{})' .format(file_name, line_number), 1) @@ -140,12 +139,12 @@ class Subtitle: self.time_end += ms def replace(self, pattern, new_content): - for line in self.content: - line = pattern.replace(new_content, line) + self.content = \ + list(map(lambda line: pattern.sub(new_content, line), self.content)) def matches(self, regexp): for line in self.content: - if regexp.findall(line): + if regexp.search(line): return True return False @@ -272,14 +271,15 @@ class SubripFile: def write_file(self): output = open(self.file_name, 'w', encoding='utf-8') output.write(repr(self)) - output.write('\n') + if len(self.subs) > 0: + output.write('\n') output.close() def __repr__(self): return '\n\n'.join(map(repr, self.subs)) -def main(): +def parse_args(args): parser = argparse.ArgumentParser( prog='fsub', description='Fix, edit and clean SubRip (.srt) files.', @@ -326,7 +326,7 @@ def main(): nargs='+' ) - args = parser.parse_args() + args = parser.parse_args(args) # Make sure --clean is the default if not args.shift and not args.no_html: @@ -336,6 +336,11 @@ def main(): if not args.clean and args.config_file: panic('-f requires -c', 1) + return args + + +def run(args): + args = parse_args(args) config = ConfigFile(args) parsed_files = [] @@ -348,5 +353,9 @@ def main(): file.process(args, config) +def main(): + run(list(iter(sys.argv).next())) + + if __name__ == '__main__': main() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..f56aafe --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,6 @@ +from tests.unit import * +from tests.integration import * +import unittest + +if __name__ == '__main__': + unittest.main() diff --git a/tests/integration.py b/tests/integration.py new file mode 100644 index 0000000..0f870db --- /dev/null +++ b/tests/integration.py @@ -0,0 +1,62 @@ +import unittest +import src.fsub.fsub as fsub +import shutil +import os +import inspect +from pathlib import Path + + +class TestFsub(unittest.TestCase): + samples = Path('tests/samples') + maxDiff = None + + def run_on(self, args, sample, ofile): + ifile = inspect.stack()[1][3] + '.srt' + + sample = str(self.samples / sample) + '.srt' + shutil.copy(sample, ifile) + args.append(ifile) + + fsub.run(args) + + out = open(ifile) + result = out.read() + out.close() + + ofile = str(self.samples / ofile) + '.srt' + cmp_file = open(ofile) + cmp = cmp_file.read() + cmp_file.close() + + self.assertEqual(result, cmp) + os.remove(ifile) + + def test_cleaned(self): + args = ['-f', str(self.samples / 'blacklist')] + self.run_on(args, 'sample1', 'sample1-cleaned') + + def test_stripped(self): + self.run_on(['-n'], 'sample1', 'sample1-stripped') + + def test_cleaned_stripped(self): + args = ['-c', '-f', str(self.samples / 'blacklist'), '-n'] + self.run_on(args, 'sample1', 'sample1-cleaned-stripped') + + def test_cleaned_stripped_shifted_1h(self): + args = ['-c', + '-f', str(self.samples / 'blacklist'), + '-n', + '-s', '3600000'] + self.run_on(args, 'sample1', 'sample1-cleaned-stripped-shifted-1h') + + def test_shifted_minus_1h(self): + args = ['-s', '-3600000'] + self.run_on(args, 'sample1', 'sample1-shifted-minus-1h') + + def test_shifted_minus_52s(self): + args = ['-s', '-52000'] + self.run_on(args, 'sample1', 'sample1-shifted-minus-52s') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/samples/blacklist b/tests/samples/blacklist new file mode 100644 index 0000000..b317114 --- /dev/null +++ b/tests/samples/blacklist @@ -0,0 +1 @@ +Even diff --git a/tests/samples/sample1-cleaned-stripped-shifted-1h.srt b/tests/samples/sample1-cleaned-stripped-shifted-1h.srt new file mode 100644 index 0000000..e76e652 --- /dev/null +++ b/tests/samples/sample1-cleaned-stripped-shifted-1h.srt @@ -0,0 +1,19 @@ +1 +01:00:48,900 --> 01:00:49,800 +This one is full of HTML tags. +Above, below, everywhere + +2 +01:00:53,500 --> 01:00:55,200 +The script should not +care whether the tag is +valid or not + +3 +01:00:56,000 --> 01:00:57,000 +It should just strip all of +them mercilessly + +4 +01:00:58,100 --> 01:00:59,600 +Including this one! diff --git a/tests/samples/sample1-cleaned-stripped.srt b/tests/samples/sample1-cleaned-stripped.srt new file mode 100644 index 0000000..3d658d6 --- /dev/null +++ b/tests/samples/sample1-cleaned-stripped.srt @@ -0,0 +1,19 @@ +1 +00:00:48,900 --> 00:00:49,800 +This one is full of HTML tags. +Above, below, everywhere + +2 +00:00:53,500 --> 00:00:55,200 +The script should not +care whether the tag is +valid or not + +3 +00:00:56,000 --> 00:00:57,000 +It should just strip all of +them mercilessly + +4 +00:00:58,100 --> 00:00:59,600 +Including this one! diff --git a/tests/samples/sample1-cleaned.srt b/tests/samples/sample1-cleaned.srt new file mode 100644 index 0000000..f3abc62 --- /dev/null +++ b/tests/samples/sample1-cleaned.srt @@ -0,0 +1,19 @@ +1 +00:00:48,900 --> 00:00:49,800 +This one is full of HTML tags. +Above, below, everywhere + +2 +00:00:53,500 --> 00:00:55,200 +The script should not +care whether the tag is +valid or not + +3 +00:00:56,000 --> 00:00:57,000 +

It should just strip all of +them mercilessly

+ +4 +00:00:58,100 --> 00:00:59,600 + diff --git a/tests/samples/sample1-shifted-minus-1h.srt b/tests/samples/sample1-shifted-minus-1h.srt new file mode 100644 index 0000000..e69de29 diff --git a/tests/samples/sample1-shifted-minus-52s.srt b/tests/samples/sample1-shifted-minus-52s.srt new file mode 100644 index 0000000..b8cd2b2 --- /dev/null +++ b/tests/samples/sample1-shifted-minus-52s.srt @@ -0,0 +1,14 @@ +1 +00:00:01,500 --> 00:00:03,200 +The script should not +care whether the tag is +valid or not + +2 +00:00:04,000 --> 00:00:05,000 +

It should just strip all of +them mercilessly

+ +3 +00:00:06,100 --> 00:00:07,600 + diff --git a/tests/samples/sample1-stripped.srt b/tests/samples/sample1-stripped.srt new file mode 100644 index 0000000..5d9385c --- /dev/null +++ b/tests/samples/sample1-stripped.srt @@ -0,0 +1,23 @@ +1 +00:00:48,900 --> 00:00:49,800 +This one is full of HTML tags. +Above, below, everywhere + +2 +00:00:51,800 --> 00:00:52,700 +Even 's! + +3 +00:00:53,500 --> 00:00:55,200 +The script should not +care whether the tag is +valid or not + +4 +00:00:56,000 --> 00:00:57,000 +It should just strip all of +them mercilessly + +5 +00:00:58,100 --> 00:00:59,600 +Including this one! diff --git a/tests/samples/sample1.srt b/tests/samples/sample1.srt new file mode 100644 index 0000000..05156ed --- /dev/null +++ b/tests/samples/sample1.srt @@ -0,0 +1,23 @@ +1 +00:00:48,900 --> 00:00:49,800 +This one is full of HTML tags. +Above, below, everywhere + +2 +00:00:51,800 --> 00:00:52,700 +
Even 's! + +3 +00:00:53,500 --> 00:00:55,200 +The script should not +care whether the tag is +valid or not + +4 +00:00:56,000 --> 00:00:57,000 +

It should just strip all of +them mercilessly

+ +5 +00:00:58,100 --> 00:00:59,600 + diff --git a/tests/samples/sample2.srt b/tests/samples/sample2.srt new file mode 100644 index 0000000..85d42e8 --- /dev/null +++ b/tests/samples/sample2.srt @@ -0,0 +1,6 @@ + +1 +00:01:00,000 --> 00:01:01,000 +Just a dummy line, I'm sorry +But there's whitespace! + diff --git a/tests/unit.py b/tests/unit.py new file mode 100755 index 0000000..aa18f3d --- /dev/null +++ b/tests/unit.py @@ -0,0 +1,198 @@ +import unittest +import re +import io +import sys +import src.fsub.fsub as fsub + + +class TestTimeStamp(unittest.TestCase): + def test_parse(self): + # 3 h = 10800000 ms + # 46 min = 2760000 ms + # 13 s = 13000 ms + # 93 ms + # summed up: 13573093 ms + t = fsub.TimeStamp('03:46:13,093') + self.assertEqual(t.time, 13573093) + self.assertEqual(t.hours, 3) + self.assertEqual(t.minutes, 46) + self.assertEqual(t.seconds, 13) + self.assertEqual(t.millisecods, 93) + + @unittest.expectedFailure + def test_missing_comma(self): + fsub.TimeStamp('00:00:00000') + + @unittest.expectedFailure + def test_missing_zeros(self): + fsub.TimeStamp('0:0:00,00') + + def test_repr(self): + time = '03:46:13,093' + t = fsub.TimeStamp(time) + self.assertEqual(repr(t), time) + + def test_operations(self): + t1_str = '03:46:13,093' + t1 = fsub.TimeStamp(t1_str) + t2 = fsub.TimeStamp('07:39:50,920') + res = fsub.TimeStamp('11:26:04,013') + zero = fsub.TimeStamp('00:00:00,000') + + self.assertNotEqual(t1, t2) + self.assertLess(t1, t2) + self.assertGreater(t2, t1) + + t1 += t2 + + self.assertEqual(t1, res) + self.assertGreater(t1, t2) + self.assertLess(t2, t1) + + t1 += -t2 + + self.assertEqual(t1, fsub.TimeStamp(t1_str)) + self.assertLess(t1, t2) + self.assertGreater(t2, t1) + + t1 -= t1 + t = t2.time + t2 += t + t2 -= t + t2 -= t + + self.assertEqual(t1, zero) + self.assertEqual(t2, zero) + + +class TestSubtitle(unittest.TestCase): + sample_n = 10 + sample_start = '02:01:02,000' + sample_end = '02:02:00,000' + sample_content = \ + 'This is a test subtitle, which\n' + \ + 'may contain line breaks' + sample_sub = '{}\n{} --> {}\n{}' \ + .format(sample_n, sample_start, sample_end, sample_content) + sample_fname = 'some_file.srt' + sample_line = 30 + + def test_parse(self): + sub = fsub.Subtitle(self.sample_sub, + self.sample_fname, + self.sample_line) + + self.assertEqual(sub.number, self.sample_n) + self.assertEqual(repr(sub.time_start), self.sample_start) + self.assertEqual(repr(sub.time_end), self.sample_end) + self.assertEqual(len(sub), 4) + + for line in zip(self.sample_content.splitlines(), sub.content): + self.assertEqual(line[0], line[1]) + + def test_repr(self): + sub = fsub.Subtitle(self.sample_sub, + self.sample_fname, + self.sample_line) + self.assertEqual(repr(sub), self.sample_sub) + + def test_shift(self): + sub = fsub.Subtitle(self.sample_sub, + self.sample_fname, + self.sample_line) + start = fsub.TimeStamp(self.sample_start) + end = fsub.TimeStamp(self.sample_end) + # Some random amount + shift_by = 2327291392 + + sub.shift(shift_by) + start += shift_by + end += shift_by + self.assertEqual(sub.time_start, start) + self.assertEqual(sub.time_end, end) + + def test_replace(self): + sub = fsub.Subtitle(self.sample_sub, + self.sample_fname, + self.sample_line) + + sub.replace(re.compile('dummy str not in sub'), '') + + self.assertEqual(repr(sub), self.sample_sub) + + sub.replace(re.compile('is a test'), 'is not a test') + + self.assertNotEqual(repr(sub), self.sample_sub) + + def test_matches(self): + sub = fsub.Subtitle(self.sample_sub, + self.sample_fname, + self.sample_line) + + m1 = sub.matches(re.compile('dummy str not in sub')) + + self.assertFalse(m1) + + m2 = sub.matches(re.compile('is a test')) + + self.assertTrue(m2) + + @unittest.expectedFailure + def test_bad_number(self): + sub_str = """badnumber +02:01:02,000 --> 02:02:00,000 +This is a test subtitle, which +may contain line breaks""" + sys.stderr = io.StringIO() + + fsub.Subtitle(sub_str, + self.sample_fname, + self.sample_line) + + sys.stderr = sys.__stderr__ + + @unittest.expectedFailure + def test_neg_number(self): + sub_str = """-1 +02:01:02,000 --> 02:02:00,000 +This is a test subtitle, which +may contain line breaks""" + sys.stderr = io.StringIO() + + fsub.Subtitle(sub_str, + self.sample_fname, + self.sample_line) + + sys.stderr = sys.__stderr__ + + @unittest.expectedFailure + def test_bad_time_span(self): + sub_str = """1 +02:01:02,000 <-- 02:02:00,000 +This is a test subtitle, which +may contain line breaks""" + sys.stderr = io.StringIO() + + fsub.Subtitle(sub_str, + self.sample_fname, + self.sample_line) + + sys.stderr = sys.__stderr__ + + @unittest.expectedFailure + def test_inverted_time(self): + sub_str = """1 +12:01:02,000 --> 02:02:00,000 +This is a test subtitle, which +may contain line breaks""" + sys.stderr = io.StringIO() + + fsub.Subtitle(sub_str, + self.sample_fname, + self.sample_line) + + sys.stderr = sys.__stderr__ + + +if __name__ == '__main__': + unittest.main()