You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
157 lines
6.2 KiB
157 lines
6.2 KiB
3 years ago
|
import re
|
||
|
from collections import defaultdict
|
||
|
from enum import Enum
|
||
|
import requests
|
||
|
|
||
3 years ago
|
from app.profile_data import ProfileData
|
||
3 years ago
|
|
||
|
TermCategory = Enum('TermCategory', 'Preferred Required Ignored')
|
||
|
|
||
|
header_regex = re.compile(r'^(#+)\s([\w\s\d]+)\s*$')
|
||
|
score_regex = re.compile(r'score.*?\[(-?[\d]+)\]', re.IGNORECASE)
|
||
|
header_release_profile_regex = re.compile(r'release profile', re.IGNORECASE)
|
||
|
category_regex = (
|
||
|
(TermCategory.Required, re.compile(r'must contain', re.IGNORECASE)),
|
||
|
(TermCategory.Ignored, re.compile(r'must not contain', re.IGNORECASE)),
|
||
|
(TermCategory.Preferred, re.compile(r'preferred', re.IGNORECASE)),
|
||
|
)
|
||
|
|
||
|
class ParserState:
|
||
|
def __init__(self):
|
||
|
self.profile_name = None
|
||
|
self.score = None
|
||
|
self.current_category = TermCategory.Preferred
|
||
|
self.bracket_depth = 0
|
||
|
self.current_header_depth = -1
|
||
|
|
||
|
def reset(self):
|
||
|
self.__init__()
|
||
|
|
||
|
def is_valid(self):
|
||
|
return \
|
||
|
self.profile_name is not None and \
|
||
|
self.current_category is not None and \
|
||
|
(self.current_category != TermCategory.Preferred or self.score is not None)
|
||
|
|
||
|
# --------------------------------------------------------------------------------------------------
|
||
|
def get_markdown(page):
|
||
|
response = requests.get(f'https://raw.githubusercontent.com/TRaSH-/Guides/master/docs/Sonarr/V3/{page}.md')
|
||
|
return response.content.decode('utf8')
|
||
|
|
||
|
# --------------------------------------------------------------------------------------------------
|
||
|
def parse_category(line):
|
||
|
for rx in category_regex:
|
||
|
if rx[1].search(line):
|
||
|
return rx[0]
|
||
|
|
||
|
return None
|
||
|
|
||
|
# --------------------------------------------------------------------------------------------------
|
||
|
def parse_markdown_outside_fence(args, logger, line, state, results):
|
||
|
# Header processing
|
||
|
if match := header_regex.search(line):
|
||
|
header_depth = len(match.group(1))
|
||
|
header_text = match.group(2)
|
||
|
logger.debug(f'> Parsing Header [Text: {header_text}] [Depth: {header_depth}]')
|
||
|
|
||
|
# Profile name (always reset previous state here)
|
||
|
if header_release_profile_regex.search(header_text):
|
||
|
state.reset()
|
||
|
state.profile_name = header_text
|
||
|
logger.debug(f' - New Profile [Text: {header_text}]')
|
||
|
return
|
||
|
|
||
|
elif header_depth <= state.current_header_depth:
|
||
|
logger.debug(' - !! Non-nested, non-profile header found; resetting all state')
|
||
|
state.reset()
|
||
|
return
|
||
|
|
||
|
# Until we find a header that defines a profile, we don't care about anything under it.
|
||
|
if not state.profile_name:
|
||
|
return
|
||
|
|
||
|
# Check if we are enabling the "Include Preferred when Renaming" checkbox
|
||
|
profile = results[state.profile_name]
|
||
|
lower_line = line.lower()
|
||
|
if 'include preferred' in lower_line:
|
||
|
profile.include_preferred_when_renaming = 'not' not in lower_line
|
||
|
logger.debug(f' - "Include Preferred" found [Value: {profile.include_preferred_when_renaming}] [Line: {line}]')
|
||
|
return
|
||
|
|
||
|
# Either we have a nested header or normal line at this point
|
||
|
# We need to check if we're defining a new category.
|
||
|
if category := parse_category(line):
|
||
|
state.current_category = category
|
||
|
logger.debug(f' - Category Set [Name: {category}] [Line: {line}]')
|
||
|
# DO NOT RETURN HERE!
|
||
|
# The category and score are sometimes in the same sentence (line); continue processing the line!!
|
||
|
# return
|
||
|
|
||
|
# Check this line for a score value. We do this even if our category may not be set to 'Preferred' yet.
|
||
|
if match := score_regex.search(line):
|
||
|
state.score = int(match.group(1))
|
||
|
logger.debug(f' - Score [Value: {state.score}]')
|
||
|
return
|
||
|
|
||
|
# --------------------------------------------------------------------------------------------------
|
||
|
def parse_markdown_inside_fence(args, logger, line, state, results):
|
||
|
profile = results[state.profile_name]
|
||
|
|
||
|
if state.current_category == TermCategory.Preferred:
|
||
|
logger.debug(' + Capture Term '
|
||
|
f'[Category: {state.current_category}] '
|
||
|
f'[Score: {state.score}] '
|
||
|
f'[Strict: {args.strict_negative_scores}] '
|
||
|
f'[Term: {line}]')
|
||
|
|
||
|
if args.strict_negative_scores and state.score < 0:
|
||
|
profile.ignored.append(line)
|
||
|
else:
|
||
|
profile.preferred[state.score].append(line)
|
||
|
return
|
||
|
|
||
|
# Sometimes a comma is present at the end of these regexes, because when it's
|
||
|
# pasted into Sonarr it acts as a delimiter. However, when using them with the
|
||
|
# API we do not need them.
|
||
|
line = line.rstrip(',')
|
||
|
|
||
|
if state.current_category == TermCategory.Ignored:
|
||
|
profile.ignored.append(line)
|
||
|
logger.debug(f' + Capture Term [Category: {state.current_category}] [Term: {line}]')
|
||
|
return
|
||
|
|
||
|
if state.current_category == TermCategory.Required:
|
||
|
profile.required.append(line)
|
||
|
logger.debug(f' + Capture Term [Category: {state.current_category}] [Term: {line}]')
|
||
|
return
|
||
|
|
||
|
# --------------------------------------------------------------------------------------------------
|
||
|
def parse_markdown(args, logger, markdown_content):
|
||
|
results = defaultdict(ProfileData)
|
||
|
state = ParserState()
|
||
|
|
||
|
for line in markdown_content.splitlines():
|
||
|
# Always check if we're starting a fenced code block. Whether we are inside one or not greatly affects
|
||
|
# the logic we use.
|
||
|
if line.startswith('```'):
|
||
|
state.bracket_depth = 1 - state.bracket_depth
|
||
|
continue
|
||
|
|
||
|
# Not inside brackets
|
||
|
if state.bracket_depth == 0:
|
||
|
parse_markdown_outside_fence(args, logger, line, state, results)
|
||
|
# Inside brackets
|
||
|
elif state.bracket_depth == 1:
|
||
|
if not state.is_valid():
|
||
|
logger.debug(' - !! Inside bracket with invalid state; skipping! '
|
||
|
f'[Profile Name: {state.profile_name}] '
|
||
|
f'[Category: {state.current_category}] '
|
||
|
f'[Score: {state.score}] '
|
||
|
f'[Line: {line}] '
|
||
|
)
|
||
|
else:
|
||
|
parse_markdown_inside_fence(args, logger, line, state, results)
|
||
|
|
||
|
logger.debug('\n')
|
||
|
return results
|