#!/usr/bin/env python # -*- coding: utf-8 -*- # pylint: disable=no-self-use, pointless-statement, missing-docstring, unbalanced-tuple-unpacking, len-as-condition, no-member import re import pytest from ..pattern import StringPattern, RePattern, FunctionalPattern, REGEX_ENABLED from ..match import Match class TestStringPattern(object): """ Tests for StringPattern matching """ input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \ "which were the Hebrew letter qoph." def test_single(self): pattern = StringPattern("Celtic") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (28, 34) assert matches[0].value == "Celtic" def test_repr(self): pattern = StringPattern("Celtic") assert repr(pattern) == '' def test_ignore_case(self): pattern = StringPattern("celtic", ignore_case=False) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 pattern = StringPattern("celtic", ignore_case=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert matches[0].value == "Celtic" def test_private_names(self): pattern = StringPattern("celtic", name="test", private_names=["test"], ignore_case=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert matches[0].private def test_ignore_names(self): pattern = StringPattern("celtic", name="test", ignore_names=["test"], ignore_case=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 def test_no_match(self): pattern = StringPattern("Python") matches = list(pattern.matches(self.input_string)) assert not matches def test_multiple_patterns(self): pattern = StringPattern("playing", "annoyed", "Hebrew") matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (18, 25) assert matches[0].value == "playing" assert isinstance(matches[1], Match) assert matches[1].pattern == pattern assert matches[1].span == (46, 53) assert matches[1].value == "annoyed" assert isinstance(matches[2], Match) assert matches[2].pattern == pattern assert matches[2].span == (88, 94) assert matches[2].value == "Hebrew" def test_start_end_kwargs(self): pattern = StringPattern("Abyssinian", start=20, end=40) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 def test_matches_kwargs(self): pattern = StringPattern("Abyssinian", name="test", value="AB") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert matches[0].name == "test" assert matches[0].value == "AB" def test_post_processor(self): def post_processor(matches, pattern): assert len(matches) == 1 assert isinstance(pattern, StringPattern) return [] pattern = StringPattern("Abyssinian", name="test", value="AB", post_processor=post_processor) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 class TestRePattern(object): """ Tests for RePattern matching """ input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \ "which were the Hebrew letter qoph." def test_single_compiled(self): pattern = RePattern(re.compile("Celt.?c")) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (28, 34) assert matches[0].value == "Celtic" def test_single_string(self): pattern = RePattern("Celt.?c") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (28, 34) assert matches[0].value == "Celtic" def test_single_kwargs(self): pattern = RePattern({"pattern": "celt.?c", "flags": re.IGNORECASE}) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (28, 34) assert matches[0].value == "Celtic" def test_single_vargs(self): pattern = RePattern(("celt.?c", re.IGNORECASE)) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (28, 34) assert matches[0].value == "Celtic" def test_no_match(self): pattern = RePattern("abc.?def") matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 def test_shortcuts(self): pattern = RePattern("Celtic-violin", abbreviations=[("-", r"[\W_]+")]) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 pattern = RePattern({"pattern": "celtic-violin", "flags": re.IGNORECASE}, abbreviations=[("-", r"[\W_]+")]) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 def test_multiple_patterns(self): pattern = RePattern("pla.?ing", "ann.?yed", "Heb.?ew") matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (18, 25) assert matches[0].value == "playing" assert isinstance(matches[1], Match) assert matches[1].pattern == pattern assert matches[1].span == (46, 53) assert matches[1].value == "annoyed" assert isinstance(matches[2], Match) assert matches[2].pattern == pattern assert matches[2].span == (88, 94) assert matches[2].value == "Hebrew" def test_unnamed_groups(self): pattern = RePattern(r"(Celt.?c)\s+(\w+)") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 parent = matches[0] assert isinstance(parent, Match) assert parent.pattern == pattern assert parent.span == (28, 41) assert parent.name is None assert parent.value == "Celtic violin" assert len(parent.children) == 2 group1, group2 = parent.children assert isinstance(group1, Match) assert group1.pattern == pattern assert group1.span == (28, 34) assert group1.name is None assert group1.value == "Celtic" assert group1.parent == parent assert isinstance(group2, Match) assert group2.pattern == pattern assert group2.span == (35, 41) assert group2.name is None assert group2.value == "violin" assert group2.parent == parent def test_named_groups(self): pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 parent = matches[0] assert isinstance(parent, Match) assert parent.pattern == pattern assert parent.span == (28, 41) assert parent.name is None assert parent.value == "Celtic violin" assert len(parent.children) == 2 group1, group2 = parent.children assert isinstance(group1, Match) assert group1.pattern == pattern assert group1.span == (28, 34) assert group1.name == "param1" assert group1.value == "Celtic" assert group1.parent == parent assert isinstance(group2, Match) assert group2.pattern == pattern assert group2.span == (35, 41) assert group2.name == "param2" assert group2.value == "violin" assert group2.parent == parent def test_children(self): pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 2 group1, group2 = matches assert isinstance(group1, Match) assert group1.pattern == pattern assert group1.span == (28, 34) assert group1.name == "param1" assert group1.value == "Celtic" assert isinstance(group2, Match) assert group2.pattern == pattern assert group2.span == (35, 41) assert group2.name == "param2" assert group2.value == "violin" def test_children_parent_private(self): pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", children=True, private_parent=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 parent, group1, group2 = matches assert isinstance(group1, Match) assert parent.private assert parent.pattern == pattern assert parent.span == (28, 41) assert parent.name is None assert parent.value == "Celtic violin" assert isinstance(group1, Match) assert not group1.private assert group1.pattern == pattern assert group1.span == (28, 34) assert group1.name == "param1" assert group1.value == "Celtic" assert isinstance(group2, Match) assert not group2.private assert group2.pattern == pattern assert group2.span == (35, 41) assert group2.name == "param2" assert group2.value == "violin" def test_parent_children_private(self): pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", private_children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 parent, group1, group2 = matches assert isinstance(group1, Match) assert not parent.private assert parent.pattern == pattern assert parent.span == (28, 41) assert parent.name is None assert parent.value == "Celtic violin" assert isinstance(group1, Match) assert group1.private assert group1.pattern == pattern assert group1.span == (28, 34) assert group1.name == "param1" assert group1.value == "Celtic" assert isinstance(group2, Match) assert group2.private assert group2.pattern == pattern assert group2.span == (35, 41) assert group2.name == "param2" assert group2.value == "violin" def test_every(self): pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", every=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 parent, group1, group2 = matches assert isinstance(group1, Match) assert not parent.private assert parent.pattern == pattern assert parent.span == (28, 41) assert parent.name is None assert parent.value == "Celtic violin" assert isinstance(group1, Match) assert not group1.private assert group1.pattern == pattern assert group1.span == (28, 34) assert group1.name == "param1" assert group1.value == "Celtic" assert isinstance(group2, Match) assert not group2.private assert group2.pattern == pattern assert group2.span == (35, 41) assert group2.name == "param2" assert group2.value == "violin" def test_private_names(self): pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", private_names=["param2"], children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 2 assert matches[0].name == "param1" assert not matches[0].private assert matches[1].name == "param2" assert matches[1].private def test_ignore_names(self): pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", ignore_names=["param2"], children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert matches[0].name == "param1" def test_matches_kwargs(self): pattern = RePattern("He.rew", name="test", value="HE") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert matches[0].name == "test" assert matches[0].value == "HE" pattern = RePattern("H(e.)(rew)", name="test", value="HE") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert matches[0].name == "test" assert matches[0].value == "HE" children = matches[0].children assert len(children) == 2 assert children[0].name == "test" assert children[0].value == "HE" assert children[1].name == "test" assert children[1].value == "HE" pattern = RePattern("H(?Pe.)(?Prew)", name="test", value="HE") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert matches[0].name == "test" assert matches[0].value == "HE" children = matches[0].children assert len(children) == 2 assert children[0].name == "first" assert children[0].value == "HE" assert children[1].name == "second" assert children[1].value == "HE" class TestFunctionalPattern(object): """ Tests for FunctionalPattern matching """ input_string = "An Abyssinian fly playing a Celtic violin was annoyed by trashy flags on " \ "which were the Hebrew letter qoph." def test_single_vargs(self): def func(input_string): i = input_string.find("fly") if i > -1: return i, i + len("fly"), "fly", "functional" pattern = FunctionalPattern(func) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (14, 17) assert matches[0].name == "functional" assert matches[0].value == "fly" def test_single_kwargs(self): def func(input_string): i = input_string.find("fly") if i > -1: return {"start": i, "end": i + len("fly"), "name": "functional"} pattern = FunctionalPattern(func) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (14, 17) assert matches[0].name == "functional" assert matches[0].value == "fly" def test_multiple_objects(self): def func(input_string): i = input_string.find("fly") matches = [] if i > -1: matches.append((i, i + len("fly"), {'name': "functional"})) i = input_string.find("annoyed") if i > -1: matches.append((i, i + len("annoyed"))) i = input_string.find("Hebrew") if i > -1: matches.append({"start": i, "end": i + len("Hebrew")}) return matches pattern = FunctionalPattern(func) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (14, 17) assert matches[0].name == "functional" assert matches[0].value == "fly" assert isinstance(matches[1], Match) assert matches[1].pattern == pattern assert matches[1].span == (46, 53) assert matches[1].value == "annoyed" assert isinstance(matches[2], Match) assert matches[2].pattern == pattern assert matches[2].span == (88, 94) assert matches[2].value == "Hebrew" def test_multiple_generator(self): def func(input_string): i = input_string.find("fly") if i > -1: yield (i, i + len("fly"), {'name': "functional"}) i = input_string.find("annoyed") if i > -1: yield (i, i + len("annoyed")) i = input_string.find("Hebrew") if i > -1: yield (i, {"end": i + len("Hebrew")}) pattern = FunctionalPattern(func) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (14, 17) assert matches[0].name == "functional" assert matches[0].value == "fly" assert isinstance(matches[1], Match) assert matches[1].pattern == pattern assert matches[1].span == (46, 53) assert matches[1].value == "annoyed" assert isinstance(matches[2], Match) assert matches[2].pattern == pattern assert matches[2].span == (88, 94) assert matches[2].value == "Hebrew" def test_no_match(self): pattern = FunctionalPattern(lambda x: None) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 def test_multiple_patterns(self): def playing(input_string): i = input_string.find("playing") if i > -1: return i, i + len("playing") def annoyed(input_string): i = input_string.find("annoyed") if i > -1: return i, i + len("annoyed") def hebrew(input_string): i = input_string.find("Hebrew") if i > -1: return i, i + len("Hebrew") pattern = FunctionalPattern(playing, annoyed, hebrew) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (18, 25) assert matches[0].value == "playing" assert isinstance(matches[1], Match) assert matches[1].pattern == pattern assert matches[1].span == (46, 53) assert matches[1].value == "annoyed" assert isinstance(matches[2], Match) assert matches[2].pattern == pattern assert matches[2].span == (88, 94) assert matches[2].value == "Hebrew" def test_matches_kwargs(self): def playing(input_string): i = input_string.find("playing") if i > -1: return i, i + len("playing") pattern = FunctionalPattern(playing, name="test", value="PLAY") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert matches[0].name == "test" assert matches[0].value == "PLAY" class TestValue(object): """ Tests for value option """ input_string = "This string contains 1849 a number" def test_str_value(self): pattern = StringPattern("1849", name="dummy", value="test") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (21, 25) assert matches[0].value == "test" def test_dict_child_value(self): pattern = RePattern(r"(?Pcont.?ins)\s+(?P\d+)", formatter={'intParam': lambda x: int(x) * 2, 'strParam': lambda x: "really " + x}, format_all=True, value={'intParam': 'INT_PARAM_VALUE'}) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 parent = matches[0] assert len(parent.children) == 2 group1, group2 = parent.children assert isinstance(group1, Match) assert group1.pattern == pattern assert group1.span == (12, 20) assert group1.value == "really contains" assert isinstance(group2, Match) assert group2.pattern == pattern assert group2.span == (21, 25) assert group2.value == 'INT_PARAM_VALUE' def test_dict_default_value(self): pattern = RePattern(r"(?Pcont.?ins)\s+(?P\d+)", formatter={'intParam': lambda x: int(x) * 2, 'strParam': lambda x: "really " + x}, format_all=True, value={'__children__': 'CHILD', 'strParam': 'STR_VALUE', '__parent__': 'PARENT'}) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 parent = matches[0] assert parent.value == "PARENT" assert len(parent.children) == 2 group1, group2 = parent.children assert isinstance(group1, Match) assert group1.pattern == pattern assert group1.span == (12, 20) assert group1.value == "STR_VALUE" assert isinstance(group2, Match) assert group2.pattern == pattern assert group2.span == (21, 25) assert group2.value == "CHILD" class TestFormatter(object): """ Tests for formatter option """ input_string = "This string contains 1849 a number" def test_single_string(self): pattern = StringPattern("1849", name="dummy", formatter=lambda x: int(x) / 2) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (21, 25) assert matches[0].value == 1849 / 2 def test_single_re_no_group(self): pattern = RePattern(r"\d+", formatter=lambda x: int(x) * 2) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (21, 25) assert matches[0].value == 1849 * 2 def test_single_re_named_groups(self): pattern = RePattern(r"(?Pcont.?ins)\s+(?P\d+)", formatter={'intParam': lambda x: int(x) * 2, 'strParam': lambda x: "really " + x}, format_all=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 parent = matches[0] assert len(parent.children) == 2 group1, group2 = parent.children assert isinstance(group1, Match) assert group1.pattern == pattern assert group1.span == (12, 20) assert group1.value == "really contains" assert isinstance(group2, Match) assert group2.pattern == pattern assert group2.span == (21, 25) assert group2.value == 1849 * 2 def test_repeated_captures_option(self): pattern = RePattern(r"\[(\d+)\](?:-(\d+))*") matches = list(pattern.matches("[02]-03-04-05-06")) assert len(matches) == 1 match = matches[0] if REGEX_ENABLED: assert len(match.children) == 5 assert [child.value for child in match.children] == ["02", "03", "04", "05", "06"] else: assert len(match.children) == 2 assert [child.value for child in match.children] == ["02", "06"] with pytest.raises(NotImplementedError): RePattern(r"\[(\d+)\](?:-(\d+))*", repeated_captures=True) pattern = RePattern(r"\[(\d+)\](?:-(\d+))*", repeated_captures=False) matches = list(pattern.matches("[02]-03-04-05-06")) assert len(matches) == 1 match = matches[0] assert len(match.children) == 2 assert [child.value for child in match.children] == ["02", "06"] def test_single_functional(self): def digit(input_string): i = input_string.find("1849") if i > -1: return i, i + len("1849") pattern = FunctionalPattern(digit, formatter=lambda x: int(x) * 3) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert isinstance(matches[0], Match) assert matches[0].pattern == pattern assert matches[0].span == (21, 25) assert matches[0].value == 1849 * 3 class TestValidator(object): """ Tests for validator option """ input_string = "This string contains 1849 a number" @staticmethod def true_validator(match): return int(match.value) < 1850 @staticmethod def false_validator(match): return int(match.value) >= 1850 def test_single_string(self): pattern = StringPattern("1849", name="dummy", validator=self.false_validator) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 pattern = StringPattern("1849", validator=self.true_validator) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 def test_single_re_no_group(self): pattern = RePattern(r"\d+", validator=self.false_validator) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 pattern = RePattern(r"\d+", validator=self.true_validator) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 def test_single_re_named_groups(self): pattern = RePattern(r"(?Pcont.?ins)\s+(?P\d+)", validator={'intParam': self.false_validator}, validate_all=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 pattern = RePattern(r"(?Pcont.?ins)\s+(?P\d+)", validator={'intParam': self.true_validator}, validate_all=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 def test_validate_all(self): pattern = RePattern(r"contains (?P\d+)", formatter=int, validator=lambda match: match.value < 100, children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 pattern = RePattern(r"contains (?P\d+)", formatter=int, validator=lambda match: match.value > 100, children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 def invalid_func(match): if match.name == 'intParam': return True return match.value.startswith('abc') pattern = RePattern(r"contains (?P\d+)", formatter=int, validator=invalid_func, validate_all=True, children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 def func(match): if match.name == 'intParam': return True return match.value.startswith('contains') pattern = RePattern(r"contains (?P\d+)", formatter=int, validator=func, validate_all=True, children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 def test_format_all(self): pattern = RePattern(r"contains (?P\d+)", formatter=int, children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 for match in matches: assert match.value is not None with pytest.raises(ValueError): pattern = RePattern(r"contains (?P\d+)", formatter=int, format_all=True) matches = list(pattern.matches(self.input_string)) for match in matches: assert match.value is not None def test_single_functional(self): def digit(input_string): i = input_string.find("1849") if i > -1: return i, i + len("1849") pattern = FunctionalPattern(digit, validator=self.false_validator) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 pattern = FunctionalPattern(digit, validator=self.true_validator) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1