#!/usr/bin/env python # -*- coding: utf-8 -*- # pylint: disable=no-self-use, pointless-statement, missing-docstring, unneeded-not, len-as-condition import pytest from ..match import Match, Matches from ..pattern import StringPattern, RePattern from ..formatters import formatters class TestMatchClass(object): def test_repr(self): match1 = Match(1, 3, value="es") assert repr(match1) == '' match2 = Match(0, 4, value="test", private=True, name="abc", tags=['one', 'two']) assert repr(match2) == '' def test_names(self): parent = Match(0, 10, name="test") parent.children.append(Match(0, 10, name="child1", parent=parent)) parent.children.append(Match(0, 10, name="child2", parent=parent)) assert set(parent.names) == set(["child1", "child2"]) def test_equality(self): match1 = Match(1, 3, value="es") match2 = Match(1, 3, value="es") other = object() assert hash(match1) == hash(match2) assert hash(match1) != hash(other) assert match1 == match2 assert not match1 == other def test_inequality(self): match1 = Match(0, 2, value="te") match2 = Match(2, 4, value="st") match3 = Match(0, 2, value="other") other = object() assert hash(match1) != hash(match2) assert hash(match1) != hash(match3) assert match1 != other assert match1 != match2 assert match1 != match3 def test_length(self): match1 = Match(0, 4, value="test") match2 = Match(0, 2, value="spanIsUsed") assert len(match1) == 4 assert len(match2) == 2 def test_compare(self): match1 = Match(0, 2, value="te") match2 = Match(2, 4, value="st") other = object() assert match1 < match2 assert match1 <= match2 assert match2 > match1 assert match2 >= match1 with pytest.raises(TypeError): match1 < other with pytest.raises(TypeError): match1 <= other with pytest.raises(TypeError): match1 > other with pytest.raises(TypeError): match1 >= other def test_value(self): match1 = Match(1, 3) match1.value = "test" assert match1.value == "test" class TestMatchesClass(object): match1 = Match(0, 2, value="te", name="start") match2 = Match(2, 3, value="s", tags="tag1") match3 = Match(3, 4, value="t", tags=["tag1", "tag2"]) match4 = Match(2, 4, value="st", name="end") def test_tag(self): matches = Matches() matches.append(self.match1) matches.append(self.match2) matches.append(self.match3) matches.append(self.match4) assert "start" in matches.names assert "end" in matches.names assert "tag1" in matches.tags assert "tag2" in matches.tags assert self.match3.tagged("tag1") assert not self.match3.tagged("start") tag1 = matches.tagged("tag1") assert len(tag1) == 2 assert tag1[0] == self.match2 assert tag1[1] == self.match3 tag2 = matches.tagged("tag2") assert len(tag2) == 1 assert tag2[0] == self.match3 start = matches.named("start") assert len(start) == 1 assert start[0] == self.match1 end = matches.named("end") assert len(end) == 1 assert end[0] == self.match4 def test_base(self): matches = Matches() matches.append(self.match1) assert len(matches) == 1 assert repr(matches) == repr([self.match1]) assert list(matches.starting(0)) == [self.match1] assert list(matches.ending(2)) == [self.match1] matches.append(self.match2) matches.append(self.match3) matches.append(self.match4) assert len(matches) == 4 assert list(matches.starting(2)) == [self.match2, self.match4] assert list(matches.starting(3)) == [self.match3] assert list(matches.ending(3)) == [self.match2] assert list(matches.ending(4)) == [self.match3, self.match4] assert list(matches.range()) == [self.match1, self.match2, self.match4, self.match3] assert list(matches.range(0)) == [self.match1, self.match2, self.match4, self.match3] assert list(matches.range(0, 3)) == [self.match1, self.match2, self.match4] assert list(matches.range(2, 3)) == [self.match2, self.match4] assert list(matches.range(3, 4)) == [self.match4, self.match3] matches.remove(self.match1) assert len(matches) == 3 assert len(matches.starting(0)) == 0 assert len(matches.ending(2)) == 0 matches.clear() assert len(matches) == 0 assert len(matches.starting(0)) == 0 assert len(matches.starting(2)) == 0 assert len(matches.starting(3)) == 0 assert len(matches.ending(2)) == 0 assert len(matches.ending(3)) == 0 assert len(matches.ending(4)) == 0 def test_get_slices(self): matches = Matches() matches.append(self.match1) matches.append(self.match2) matches.append(self.match3) matches.append(self.match4) slice_matches = matches[1:3] assert isinstance(slice_matches, Matches) assert len(slice_matches) == 2 assert slice_matches[0] == self.match2 assert slice_matches[1] == self.match3 def test_remove_slices(self): matches = Matches() matches.append(self.match1) matches.append(self.match2) matches.append(self.match3) matches.append(self.match4) del matches[1:3] assert len(matches) == 2 assert matches[0] == self.match1 assert matches[1] == self.match4 def test_set_slices(self): matches = Matches() matches.append(self.match1) matches.append(self.match2) matches.append(self.match3) matches.append(self.match4) matches[1:3] = self.match1, self.match4 assert len(matches) == 4 assert matches[0] == self.match1 assert matches[1] == self.match1 assert matches[2] == self.match4 assert matches[3] == self.match4 def test_set_index(self): matches = Matches() matches.append(self.match1) matches.append(self.match2) matches.append(self.match3) matches[1] = self.match4 assert len(matches) == 3 assert matches[0] == self.match1 assert matches[1] == self.match4 assert matches[2] == self.match3 def test_constructor(self): matches = Matches([self.match1, self.match2, self.match3, self.match4]) assert len(matches) == 4 assert list(matches.starting(0)) == [self.match1] assert list(matches.ending(2)) == [self.match1] assert list(matches.starting(2)) == [self.match2, self.match4] assert list(matches.starting(3)) == [self.match3] assert list(matches.ending(3)) == [self.match2] assert list(matches.ending(4)) == [self.match3, self.match4] def test_constructor_kwargs(self): matches = Matches([self.match1, self.match2, self.match3, self.match4], input_string="test") assert len(matches) == 4 assert matches.input_string == "test" assert list(matches.starting(0)) == [self.match1] assert list(matches.ending(2)) == [self.match1] assert list(matches.starting(2)) == [self.match2, self.match4] assert list(matches.starting(3)) == [self.match3] assert list(matches.ending(3)) == [self.match2] assert list(matches.ending(4)) == [self.match3, self.match4] def test_crop(self): input_string = "abcdefghijklmnopqrstuvwxyz" match1 = Match(1, 10, input_string=input_string) match2 = Match(0, 2, input_string=input_string) match3 = Match(8, 15, input_string=input_string) ret = match1.crop([match2, match3.span]) assert len(ret) == 1 assert ret[0].span == (2, 8) assert ret[0].value == "cdefgh" ret = match1.crop((1, 10)) assert len(ret) == 0 ret = match1.crop((1, 3)) assert len(ret) == 1 assert ret[0].span == (3, 10) ret = match1.crop((7, 10)) assert len(ret) == 1 assert ret[0].span == (1, 7) ret = match1.crop((0, 12)) assert len(ret) == 0 ret = match1.crop((4, 6)) assert len(ret) == 2 assert ret[0].span == (1, 4) assert ret[1].span == (6, 10) ret = match1.crop([(3, 5), (7, 9)]) assert len(ret) == 3 assert ret[0].span == (1, 3) assert ret[1].span == (5, 7) assert ret[2].span == (9, 10) def test_split(self): input_string = "123 +word1 - word2 + word3 456" match = Match(3, len(input_string) - 3, input_string=input_string) splitted = match.split(" -+") assert len(splitted) == 3 assert [split.value for split in splitted] == ["word1", "word2", "word3"] class TestMaches(object): def test_names(self): input_string = "One Two Three" matches = Matches() matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string)) matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string)) matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string)) matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string)) matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string)) matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string)) assert set(matches.names) == set(["1-str", "1-re", "2-str", "2-re", "3-str", "3-re"]) def test_filters(self): input_string = "One Two Three" matches = Matches() matches.extend(StringPattern("One", name="1-str", tags=["One", "str"]).matches(input_string)) matches.extend(RePattern("One", name="1-re", tags=["One", "re"]).matches(input_string)) matches.extend(StringPattern("Two", name="2-str", tags=["Two", "str"]).matches(input_string)) matches.extend(RePattern("Two", name="2-re", tags=["Two", "re"]).matches(input_string)) matches.extend(StringPattern("Three", name="3-str", tags=["Three", "str"]).matches(input_string)) matches.extend(RePattern("Three", name="3-re", tags=["Three", "re"]).matches(input_string)) selection = matches.starting(0) assert len(selection) == 2 selection = matches.starting(0, lambda m: "str" in m.tags) assert len(selection) == 1 assert selection[0].pattern.name == "1-str" selection = matches.ending(7, predicate=lambda m: "str" in m.tags) assert len(selection) == 1 assert selection[0].pattern.name == "2-str" selection = matches.previous(matches.named("2-str")[0]) assert len(selection) == 2 assert selection[0].pattern.name == "1-str" assert selection[1].pattern.name == "1-re" selection = matches.previous(matches.named("2-str", 0), lambda m: "str" in m.tags) assert len(selection) == 1 assert selection[0].pattern.name == "1-str" selection = matches.next(matches.named("2-str", 0)) assert len(selection) == 2 assert selection[0].pattern.name == "3-str" assert selection[1].pattern.name == "3-re" selection = matches.next(matches.named("2-str", 0), index=0, predicate=lambda m: "re" in m.tags) assert selection is not None assert selection.pattern.name == "3-re" selection = matches.next(matches.named("2-str", index=0), lambda m: "re" in m.tags) assert len(selection) == 1 assert selection[0].pattern.name == "3-re" selection = matches.named("2-str", lambda m: "re" in m.tags) assert len(selection) == 0 selection = matches.named("2-re", lambda m: "re" in m.tags, 0) assert selection is not None assert selection.name == "2-re" # pylint:disable=no-member selection = matches.named("2-re", lambda m: "re" in m.tags) assert len(selection) == 1 assert selection[0].name == "2-re" selection = matches.named("2-re", lambda m: "re" in m.tags, index=1000) assert selection is None def test_raw(self): input_string = "0123456789" match = Match(0, 10, input_string=input_string, formatter=lambda s: s*2) assert match.value == match.raw * 2 assert match.raw == input_string match.raw_end = 9 match.raw_start = 1 assert match.value == match.raw * 2 assert match.raw == input_string[1:9] match.raw_end = None match.raw_start = None assert match.value == match.raw * 2 assert match.raw == input_string def test_formatter_chain(self): input_string = "100" match = Match(0, 3, input_string=input_string, formatter=formatters(int, lambda s: s*2, lambda s: s+10)) assert match.raw == input_string assert match.value == 100 * 2 + 10 def test_to_dict(self): input_string = "One Two Two Three" matches = Matches() matches.extend(StringPattern("One", name="1", tags=["One", "str"]).matches(input_string)) matches.extend(RePattern("One", name="1", tags=["One", "re"]).matches(input_string)) matches.extend(StringPattern("Two", name="2", tags=["Two", "str"]).matches(input_string)) matches.extend(RePattern("Two", name="2", tags=["Two", "re"]).matches(input_string)) matches.extend(RePattern("Two", name="2", tags=["Two", "reBis"]).matches(input_string)) matches.extend(StringPattern("Three", name="3", tags=["Three", "str"]).matches(input_string)) matches.extend(RePattern("Three", name="3bis", tags=["Three", "re"]).matches(input_string)) matches.extend(RePattern(r"(\w+)", name="words").matches(input_string)) kvalues = matches.to_dict(first_value=True) assert kvalues == {"1": "One", "2": "Two", "3": "Three", "3bis": "Three", "words": "One"} assert kvalues.values_list["words"] == ["One", "Two", "Three"] kvalues = matches.to_dict(enforce_list=True) assert kvalues["words"] == ["One", "Two", "Three"] kvalues = matches.to_dict(details=True) assert kvalues["1"].value == "One" assert len(kvalues["2"]) == 2 assert kvalues["2"][0].value == "Two" assert kvalues["2"][1].value == "Two" assert kvalues["3"].value == "Three" assert kvalues["3bis"].value == "Three" assert len(kvalues["words"]) == 4 assert kvalues["words"][0].value == "One" assert kvalues["words"][1].value == "Two" assert kvalues["words"][2].value == "Two" assert kvalues["words"][3].value == "Three" kvalues = matches.to_dict(details=True) assert kvalues["1"].value == "One" assert len(kvalues.values_list["2"]) == 2 assert kvalues.values_list["2"][0].value == "Two" assert kvalues.values_list["2"][1].value == "Two" assert kvalues["3"].value == "Three" assert kvalues["3bis"].value == "Three" assert len(kvalues.values_list["words"]) == 4 assert kvalues.values_list["words"][0].value == "One" assert kvalues.values_list["words"][1].value == "Two" assert kvalues.values_list["words"][2].value == "Two" assert kvalues.values_list["words"][3].value == "Three" def test_chains(self): input_string = "wordX 10 20 30 40 wordA, wordB, wordC 70 80 wordX" matches = Matches(input_string=input_string) matches.extend(RePattern(r"\d+", name="digit").matches(input_string)) matches.extend(RePattern("[a-zA-Z]+", name="word").matches(input_string)) assert len(matches) == 11 a_start = input_string.find('wordA') b_start = input_string.find('wordB') b_end = b_start + len('wordB') c_start = input_string.find('wordC') c_end = c_start + len('wordC') chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "word") assert len(chain_before) == 1 assert chain_before[0].value == 'wordA' chain_before = matches.chain_before(Match(b_start, b_start), " ,", predicate=lambda match: match.name == "word") assert len(chain_before) == 1 assert chain_before[0].value == 'wordA' chain_before = matches.chain_before(b_start, " ,", predicate=lambda match: match.name == "digit") assert len(chain_before) == 0 chain_before = matches.chain_before(a_start, " ,", predicate=lambda match: match.name == "digit") assert len(chain_before) == 4 assert [match.value for match in chain_before] == ["40", "30", "20", "10"] chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "word") assert len(chain_after) == 1 assert chain_after[0].value == 'wordC' chain_after = matches.chain_after(Match(b_end, b_end), " ,", predicate=lambda match: match.name == "word") assert len(chain_after) == 1 assert chain_after[0].value == 'wordC' chain_after = matches.chain_after(b_end, " ,", predicate=lambda match: match.name == "digit") assert len(chain_after) == 0 chain_after = matches.chain_after(c_end, " ,", predicate=lambda match: match.name == "digit") assert len(chain_after) == 2 assert [match.value for match in chain_after] == ["70", "80"] chain_after = matches.chain_after(c_end, " ,", end=10000, predicate=lambda match: match.name == "digit") assert len(chain_after) == 2 assert [match.value for match in chain_after] == ["70", "80"] def test_holes(self): input_string = '1'*10+'2'*10+'3'*10+'4'*10+'5'*10+'6'*10+'7'*10 hole1 = Match(0, 10, input_string=input_string) hole2 = Match(20, 30, input_string=input_string) hole3 = Match(30, 40, input_string=input_string) hole4 = Match(60, 70, input_string=input_string) matches = Matches([hole1, hole2], input_string=input_string) matches.append(hole3) matches.append(hole4) holes = list(matches.holes()) assert len(holes) == 2 assert holes[0].span == (10, 20) assert holes[0].value == '2'*10 assert holes[1].span == (40, 60) assert holes[1].value == '5' * 10 + '6' * 10 holes = list(matches.holes(5, 15)) assert len(holes) == 1 assert holes[0].span == (10, 15) assert holes[0].value == '2'*5 holes = list(matches.holes(5, 15, formatter=lambda value: "formatted")) assert len(holes) == 1 assert holes[0].span == (10, 15) assert holes[0].value == "formatted" holes = list(matches.holes(5, 15, predicate=lambda hole: False)) assert len(holes) == 0 def test_holes_empty(self): input_string = "Test hole on empty matches" matches = Matches(input_string=input_string) holes = matches.holes() assert len(holes) == 1 assert holes[0].value == input_string def test_holes_seps(self): input_string = "Test hole - with many separators + included" match = StringPattern("many").matches(input_string) matches = Matches(match, input_string) holes = matches.holes() assert len(holes) == 2 holes = matches.holes(seps="-+") assert len(holes) == 4 assert [hole.value for hole in holes] == ["Test hole ", " with ", " separators ", " included"]