You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
566 lines
20 KiB
566 lines
20 KiB
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
# pylint: disable=pointless-statement, missing-docstring, unneeded-not, len-as-condition
|
|
|
|
import pytest
|
|
|
|
from ..match import Match, Matches
|
|
from ..pattern import StringPattern, RePattern
|
|
from ..formatters import formatters
|
|
|
|
|
|
class TestMatchClass:
|
|
def test_repr(self):
|
|
match1 = Match(1, 3, value="es")
|
|
|
|
assert repr(match1) == '<es:(1, 3)>'
|
|
|
|
match2 = Match(0, 4, value="test", private=True, name="abc", tags=['one', 'two'])
|
|
|
|
assert repr(match2) == '<test:(0, 4)+private+name=abc+tags=[\'one\', \'two\']>'
|
|
|
|
def test_names(self):
|
|
parent = Match(0, 10, name="test")
|
|
parent.children.append(Match(0, 10, name="child1", parent=parent))
|
|
parent.children.append(Match(0, 10, name="child2", parent=parent))
|
|
|
|
assert set(parent.names) == set(["child1", "child2"])
|
|
|
|
def test_equality(self):
|
|
match1 = Match(1, 3, value="es")
|
|
match2 = Match(1, 3, value="es")
|
|
|
|
other = object()
|
|
|
|
assert hash(match1) == hash(match2)
|
|
assert hash(match1) != hash(other)
|
|
|
|
assert match1 == match2
|
|
assert not match1 == other
|
|
|
|
def test_inequality(self):
|
|
match1 = Match(0, 2, value="te")
|
|
match2 = Match(2, 4, value="st")
|
|
match3 = Match(0, 2, value="other")
|
|
|
|
other = object()
|
|
|
|
assert hash(match1) != hash(match2)
|
|
assert hash(match1) != hash(match3)
|
|
|
|
assert match1 != other
|
|
assert match1 != match2
|
|
assert match1 != match3
|
|
|
|
def test_length(self):
|
|
match1 = Match(0, 4, value="test")
|
|
match2 = Match(0, 2, value="spanIsUsed")
|
|
|
|
assert len(match1) == 4
|
|
assert len(match2) == 2
|
|
|
|
def test_compare(self):
|
|
match1 = Match(0, 2, value="te")
|
|
match2 = Match(2, 4, value="st")
|
|
|
|
other = object()
|
|
|
|
assert match1 < match2
|
|
assert match1 <= match2
|
|
|
|
assert match2 > match1
|
|
assert match2 >= match1
|
|
|
|
with pytest.raises(TypeError):
|
|
match1 < other
|
|
|
|
with pytest.raises(TypeError):
|
|
match1 <= other
|
|
|
|
with pytest.raises(TypeError):
|
|
match1 > other
|
|
|
|
with pytest.raises(TypeError):
|
|
match1 >= other
|
|
|
|
|
|
def test_value(self):
|
|
match1 = Match(1, 3)
|
|
match1.value = "test"
|
|
|
|
assert match1.value == "test"
|
|
|
|
|
|
class TestMatchesClass:
|
|
match1 = Match(0, 2, value="te", name="start")
|
|
match2 = Match(2, 3, value="s", tags="tag1")
|
|
match3 = Match(3, 4, value="t", tags=["tag1", "tag2"])
|
|
match4 = Match(2, 4, value="st", name="end")
|
|
|
|
def test_tag(self):
|
|
matches = Matches()
|
|
matches.append(self.match1)
|
|
matches.append(self.match2)
|
|
matches.append(self.match3)
|
|
matches.append(self.match4)
|
|
|
|
assert "start" in matches.names
|
|
assert "end" in matches.names
|
|
|
|
assert "tag1" in matches.tags
|
|
assert "tag2" in matches.tags
|
|
|
|
assert self.match3.tagged("tag1")
|
|
assert not self.match3.tagged("start")
|
|
|
|
tag1 = matches.tagged("tag1")
|
|
assert len(tag1) == 2
|
|
assert tag1[0] == self.match2
|
|
assert tag1[1] == self.match3
|
|
|
|
tag2 = matches.tagged("tag2")
|
|
assert len(tag2) == 1
|
|
assert tag2[0] == self.match3
|
|
|
|
start = matches.named("start")
|
|
assert len(start) == 1
|
|
assert start[0] == self.match1
|
|
|
|
end = matches.named("end")
|
|
assert len(end) == 1
|
|
assert end[0] == self.match4
|
|
|
|
def test_base(self):
|
|
matches = Matches()
|
|
matches.append(self.match1)
|
|
|
|
assert len(matches) == 1
|
|
assert repr(matches) == repr([self.match1])
|
|
assert list(matches.starting(0)) == [self.match1]
|
|
assert list(matches.ending(2)) == [self.match1]
|
|
|
|
matches.append(self.match2)
|
|
matches.append(self.match3)
|
|
matches.append(self.match4)
|
|
|
|
assert len(matches) == 4
|
|
assert list(matches.starting(2)) == [self.match2, self.match4]
|
|
assert list(matches.starting(3)) == [self.match3]
|
|
assert list(matches.ending(3)) == [self.match2]
|
|
assert list(matches.ending(4)) == [self.match3, self.match4]
|
|
assert list(matches.range()) == [self.match1, self.match2, self.match4, self.match3]
|
|
assert list(matches.range(0)) == [self.match1, self.match2, self.match4, self.match3]
|
|
assert list(matches.range(0, 3)) == [self.match1, self.match2, self.match4]
|
|
assert list(matches.range(2, 3)) == [self.match2, self.match4]
|
|
assert list(matches.range(3, 4)) == [self.match4, self.match3]
|
|
|
|
matches.remove(self.match1)
|
|
assert len(matches) == 3
|
|
assert len(matches.starting(0)) == 0
|
|
assert len(matches.ending(2)) == 0
|
|
|
|
matches.clear()
|
|
|
|
assert len(matches) == 0
|
|
assert len(matches.starting(0)) == 0
|
|
assert len(matches.starting(2)) == 0
|
|
assert len(matches.starting(3)) == 0
|
|
assert len(matches.ending(2)) == 0
|
|
assert len(matches.ending(3)) == 0
|
|
assert len(matches.ending(4)) == 0
|
|
|
|
def test_get_slices(self):
|
|
matches = Matches()
|
|
matches.append(self.match1)
|
|
matches.append(self.match2)
|
|
matches.append(self.match3)
|
|
matches.append(self.match4)
|
|
|
|
slice_matches = matches[1:3]
|
|
|
|
assert isinstance(slice_matches, Matches)
|
|
|
|
assert len(slice_matches) == 2
|
|
assert slice_matches[0] == self.match2
|
|
assert slice_matches[1] == self.match3
|
|
|
|
def test_remove_slices(self):
|
|
matches = Matches()
|
|
matches.append(self.match1)
|
|
matches.append(self.match2)
|
|
matches.append(self.match3)
|
|
matches.append(self.match4)
|
|
|
|
del matches[1:3]
|
|
|
|
assert len(matches) == 2
|
|
assert matches[0] == self.match1
|
|
assert matches[1] == self.match4
|
|
|
|
def test_set_slices(self):
|
|
matches = Matches()
|
|
matches.append(self.match1)
|
|
matches.append(self.match2)
|
|
matches.append(self.match3)
|
|
matches.append(self.match4)
|
|
|
|
matches[1:3] = self.match1, self.match4
|
|
|
|
assert len(matches) == 4
|
|
assert matches[0] == self.match1
|
|
assert matches[1] == self.match1
|
|
assert matches[2] == self.match4
|
|
assert matches[3] == self.match4
|
|
|
|
def test_set_index(self):
|
|
matches = Matches()
|
|
matches.append(self.match1)
|
|
matches.append(self.match2)
|
|
matches.append(self.match3)
|
|
|
|
matches[1] = self.match4
|
|
|
|
assert len(matches) == 3
|
|
assert matches[0] == self.match1
|
|
assert matches[1] == self.match4
|
|
assert matches[2] == self.match3
|
|
|
|
def test_constructor(self):
|
|
matches = Matches([self.match1, self.match2, self.match3, self.match4])
|
|
|
|
assert len(matches) == 4
|
|
assert list(matches.starting(0)) == [self.match1]
|
|
assert list(matches.ending(2)) == [self.match1]
|
|
assert list(matches.starting(2)) == [self.match2, self.match4]
|
|
assert list(matches.starting(3)) == [self.match3]
|
|
assert list(matches.ending(3)) == [self.match2]
|
|
assert list(matches.ending(4)) == [self.match3, self.match4]
|
|
|
|
def test_constructor_kwargs(self):
|
|
matches = Matches([self.match1, self.match2, self.match3, self.match4], input_string="test")
|
|
|
|
assert len(matches) == 4
|
|
assert matches.input_string == "test"
|
|
assert list(matches.starting(0)) == [self.match1]
|
|
assert list(matches.ending(2)) == [self.match1]
|
|
assert list(matches.starting(2)) == [self.match2, self.match4]
|
|
assert list(matches.starting(3)) == [self.match3]
|
|
assert list(matches.ending(3)) == [self.match2]
|
|
assert list(matches.ending(4)) == [self.match3, self.match4]
|
|
|
|
def test_crop(self):
|
|
input_string = "abcdefghijklmnopqrstuvwxyz"
|
|
|
|
match1 = Match(1, 10, input_string=input_string)
|
|
match2 = Match(0, 2, input_string=input_string)
|
|
match3 = Match(8, 15, input_string=input_string)
|
|
|
|
ret = match1.crop([match2, match3.span])
|
|
|
|
assert len(ret) == 1
|
|
|
|
assert ret[0].span == (2, 8)
|
|
assert ret[0].value == "cdefgh"
|
|
|
|
ret = match1.crop((1, 10))
|
|
assert len(ret) == 0
|
|
|
|
ret = match1.crop((1, 3))
|
|
assert len(ret) == 1
|
|
assert ret[0].span == (3, 10)
|
|
|
|
ret = match1.crop((7, 10))
|
|
assert len(ret) == 1
|
|
assert ret[0].span == (1, 7)
|
|
|
|
ret = match1.crop((0, 12))
|
|
assert len(ret) == 0
|
|
|
|
ret = match1.crop((4, 6))
|
|
assert len(ret) == 2
|
|
|
|
assert ret[0].span == (1, 4)
|
|
assert ret[1].span == (6, 10)
|
|
|
|
ret = match1.crop([(3, 5), (7, 9)])
|
|
assert len(ret) == 3
|
|
|
|
assert ret[0].span == (1, 3)
|
|
assert ret[1].span == (5, 7)
|
|
assert ret[2].span == (9, 10)
|
|
|
|
def test_split(self):
|
|
input_string = "123 +word1 - word2 + word3 456"
|
|
match = Match(3, len(input_string) - 3, input_string=input_string)
|
|
splitted = match.split(" -+")
|
|
|
|
assert len(splitted) == 3
|
|
assert [split.value for split in splitted] == ["word1", "word2", "word3"]
|
|
|
|
|
|
class TestMaches:
|
|
def test_names(self):
|
|
input_string = "One Two Three"
|
|
|
|
matches = Matches()
|
|
|
|
matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string))
|
|
matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string))
|
|
matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string))
|
|
matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string))
|
|
matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string))
|
|
matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string))
|
|
|
|
assert set(matches.names) == set(["1-str", "1-re", "2-str", "2-re", "3-str", "3-re"])
|
|
|
|
def test_filters(self):
|
|
input_string = "One Two Three"
|
|
|
|
matches = Matches()
|
|
|
|
matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string))
|
|
matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string))
|
|
matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string))
|
|
matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string))
|
|
matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string))
|
|
matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string))
|
|
|
|
selection = matches.starting(0)
|
|
assert len(selection) == 2
|
|
|
|
selection = matches.starting(0, lambda m: "str" in m.tags)
|
|
assert len(selection) == 1
|
|
assert selection[0].pattern.name == "1-str"
|
|
|
|
selection = matches.ending(7, predicate=lambda m: "str" in m.tags)
|
|
assert len(selection) == 1
|
|
assert selection[0].pattern.name == "2-str"
|
|
|
|
selection = matches.previous(matches.named("2-str")[0])
|
|
assert len(selection) == 2
|
|
assert selection[0].pattern.name == "1-str"
|
|
assert selection[1].pattern.name == "1-re"
|
|
|
|
selection = matches.previous(matches.named("2-str", 0), lambda m: "str" in m.tags)
|
|
assert len(selection) == 1
|
|
assert selection[0].pattern.name == "1-str"
|
|
|
|
selection = matches.next(matches.named("2-str", 0))
|
|
assert len(selection) == 2
|
|
assert selection[0].pattern.name == "3-str"
|
|
assert selection[1].pattern.name == "3-re"
|
|
|
|
selection = matches.next(matches.named("2-str", 0), index=0, predicate=lambda m: "re" in m.tags)
|
|
assert selection is not None
|
|
assert selection.pattern.name == "3-re"
|
|
|
|
selection = matches.next(matches.named("2-str", index=0), lambda m: "re" in m.tags)
|
|
assert len(selection) == 1
|
|
assert selection[0].pattern.name == "3-re"
|
|
|
|
selection = matches.named("2-str", lambda m: "re" in m.tags)
|
|
assert len(selection) == 0
|
|
|
|
selection = matches.named("2-re", lambda m: "re" in m.tags, 0)
|
|
assert selection is not None
|
|
assert selection.name == "2-re" # pylint:disable=no-member
|
|
|
|
selection = matches.named("2-re", lambda m: "re" in m.tags)
|
|
assert len(selection) == 1
|
|
assert selection[0].name == "2-re"
|
|
|
|
selection = matches.named("2-re", lambda m: "re" in m.tags, index=1000)
|
|
assert selection is None
|
|
|
|
def test_raw(self):
|
|
input_string = "0123456789"
|
|
|
|
match = Match(0, 10, input_string=input_string, formatter=lambda s: s*2)
|
|
|
|
assert match.value == match.raw * 2
|
|
assert match.raw == input_string
|
|
|
|
match.raw_end = 9
|
|
match.raw_start = 1
|
|
|
|
assert match.value == match.raw * 2
|
|
assert match.raw == input_string[1:9]
|
|
|
|
match.raw_end = None
|
|
match.raw_start = None
|
|
|
|
assert match.value == match.raw * 2
|
|
assert match.raw == input_string
|
|
|
|
|
|
def test_formatter_chain(self):
|
|
input_string = "100"
|
|
|
|
match = Match(0, 3, input_string=input_string, formatter=formatters(int, lambda s: s*2, lambda s: s+10))
|
|
|
|
assert match.raw == input_string
|
|
assert match.value == 100 * 2 + 10
|
|
|
|
|
|
def test_to_dict(self):
|
|
input_string = "One Two Two Three"
|
|
|
|
matches = Matches()
|
|
|
|
matches.extend(StringPattern("One", name="1", tags=["One", "str"]).matches(input_string))
|
|
matches.extend(RePattern("One", name="1", tags=["One", "re"]).matches(input_string))
|
|
matches.extend(StringPattern("Two", name="2", tags=["Two", "str"]).matches(input_string))
|
|
matches.extend(RePattern("Two", name="2", tags=["Two", "re"]).matches(input_string))
|
|
matches.extend(RePattern("Two", name="2", tags=["Two", "reBis"]).matches(input_string))
|
|
matches.extend(StringPattern("Three", name="3", tags=["Three", "str"]).matches(input_string))
|
|
matches.extend(RePattern("Three", name="3bis", tags=["Three", "re"]).matches(input_string))
|
|
matches.extend(RePattern(r"(\w+)", name="words").matches(input_string))
|
|
|
|
kvalues = matches.to_dict(first_value=True)
|
|
assert kvalues == {"1": "One",
|
|
"2": "Two",
|
|
"3": "Three",
|
|
"3bis": "Three",
|
|
"words": "One"}
|
|
assert kvalues.values_list["words"] == ["One", "Two", "Three"]
|
|
|
|
kvalues = matches.to_dict(enforce_list=True)
|
|
assert kvalues["words"] == ["One", "Two", "Three"]
|
|
|
|
kvalues = matches.to_dict(details=True)
|
|
assert kvalues["1"].value == "One"
|
|
|
|
assert len(kvalues["2"]) == 2
|
|
assert kvalues["2"][0].value == "Two"
|
|
assert kvalues["2"][1].value == "Two"
|
|
|
|
assert kvalues["3"].value == "Three"
|
|
assert kvalues["3bis"].value == "Three"
|
|
|
|
assert len(kvalues["words"]) == 4
|
|
assert kvalues["words"][0].value == "One"
|
|
assert kvalues["words"][1].value == "Two"
|
|
assert kvalues["words"][2].value == "Two"
|
|
assert kvalues["words"][3].value == "Three"
|
|
|
|
kvalues = matches.to_dict(details=True)
|
|
assert kvalues["1"].value == "One"
|
|
|
|
assert len(kvalues.values_list["2"]) == 2
|
|
assert kvalues.values_list["2"][0].value == "Two"
|
|
assert kvalues.values_list["2"][1].value == "Two"
|
|
|
|
assert kvalues["3"].value == "Three"
|
|
assert kvalues["3bis"].value == "Three"
|
|
|
|
assert len(kvalues.values_list["words"]) == 4
|
|
assert kvalues.values_list["words"][0].value == "One"
|
|
assert kvalues.values_list["words"][1].value == "Two"
|
|
assert kvalues.values_list["words"][2].value == "Two"
|
|
assert kvalues.values_list["words"][3].value == "Three"
|
|
|
|
def test_chains(self):
|
|
input_string = "wordX 10 20 30 40 wordA, wordB, wordC 70 80 wordX"
|
|
|
|
matches = Matches(input_string=input_string)
|
|
|
|
matches.extend(RePattern(r"\d+", name="digit").matches(input_string))
|
|
matches.extend(RePattern("[a-zA-Z]+", name="word").matches(input_string))
|
|
|
|
assert len(matches) == 11
|
|
|
|
a_start = input_string.find('wordA')
|
|
|
|
b_start = input_string.find('wordB')
|
|
b_end = b_start + len('wordB')
|
|
|
|
c_start = input_string.find('wordC')
|
|
c_end = c_start + len('wordC')
|
|
|
|
chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "word")
|
|
assert len(chain_before) == 1
|
|
assert chain_before[0].value == 'wordA'
|
|
|
|
chain_before = matches.chain_before(Match(b_start, b_start), " ,", predicate=lambda match: match.name == "word")
|
|
assert len(chain_before) == 1
|
|
assert chain_before[0].value == 'wordA'
|
|
|
|
chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "digit")
|
|
assert len(chain_before) == 0
|
|
|
|
chain_before = matches.chain_before(a_start, " ,", predicate=lambda match: match.name == "digit")
|
|
assert len(chain_before) == 4
|
|
assert [match.value for match in chain_before] == ["40", "30", "20", "10"]
|
|
|
|
chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "word")
|
|
assert len(chain_after) == 1
|
|
assert chain_after[0].value == 'wordC'
|
|
|
|
chain_after = matches.chain_after(Match(b_end, b_end), " ,", predicate=lambda match: match.name == "word")
|
|
assert len(chain_after) == 1
|
|
assert chain_after[0].value == 'wordC'
|
|
|
|
chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "digit")
|
|
assert len(chain_after) == 0
|
|
|
|
chain_after = matches.chain_after(c_end, " ,", predicate=lambda match: match.name == "digit")
|
|
assert len(chain_after) == 2
|
|
assert [match.value for match in chain_after] == ["70", "80"]
|
|
|
|
chain_after = matches.chain_after(c_end, " ,", end=10000, predicate=lambda match: match.name == "digit")
|
|
assert len(chain_after) == 2
|
|
assert [match.value for match in chain_after] == ["70", "80"]
|
|
|
|
def test_holes(self):
|
|
input_string = '1'*10+'2'*10+'3'*10+'4'*10+'5'*10+'6'*10+'7'*10
|
|
|
|
hole1 = Match(0, 10, input_string=input_string)
|
|
hole2 = Match(20, 30, input_string=input_string)
|
|
hole3 = Match(30, 40, input_string=input_string)
|
|
hole4 = Match(60, 70, input_string=input_string)
|
|
|
|
matches = Matches([hole1, hole2], input_string=input_string)
|
|
matches.append(hole3)
|
|
matches.append(hole4)
|
|
|
|
holes = list(matches.holes())
|
|
assert len(holes) == 2
|
|
assert holes[0].span == (10, 20)
|
|
assert holes[0].value == '2'*10
|
|
assert holes[1].span == (40, 60)
|
|
assert holes[1].value == '5' * 10 + '6' * 10
|
|
|
|
holes = list(matches.holes(5, 15))
|
|
assert len(holes) == 1
|
|
assert holes[0].span == (10, 15)
|
|
assert holes[0].value == '2'*5
|
|
|
|
holes = list(matches.holes(5, 15, formatter=lambda value: "formatted"))
|
|
assert len(holes) == 1
|
|
assert holes[0].span == (10, 15)
|
|
assert holes[0].value == "formatted"
|
|
|
|
holes = list(matches.holes(5, 15, predicate=lambda hole: False))
|
|
assert len(holes) == 0
|
|
|
|
def test_holes_empty(self):
|
|
input_string = "Test hole on empty matches"
|
|
matches = Matches(input_string=input_string)
|
|
holes = matches.holes()
|
|
assert len(holes) == 1
|
|
assert holes[0].value == input_string
|
|
|
|
def test_holes_seps(self):
|
|
input_string = "Test hole - with many separators + included"
|
|
match = StringPattern("many").matches(input_string)
|
|
|
|
matches = Matches(match, input_string)
|
|
holes = matches.holes()
|
|
|
|
assert len(holes) == 2
|
|
|
|
holes = matches.holes(seps="-+")
|
|
|
|
assert len(holes) == 4
|
|
assert [hole.value for hole in holes] == ["Test hole ", " with ", " separators ", " included"]
|