Skip to content

Query parser

hypha.apply.search.query_parser

parser module-attribute

parser = Lark('\n    ?start: expression\n\n    expression: (filter_expression | search_term)*\n    filter_expression: string FILTER_COLON string\n                    | string FILTER_COLON ESCAPED_STRING\n                    | FILTER_HASH NUMBER\n    search_term: string\n    filer_value: string\n            | ESCAPED_STRING\n\n    string: /[^:#\\s]+/\n\n    FILTER_COLON: ":"\n    FILTER_HASH: "#"\n\n    %import common.NUMBER\n    %import common.ESCAPED_STRING\n    %ignore /\\s+/\n', start='start', parser='lalr', transformer=QueryTransformer())

QueryTransformer

Bases: Transformer

expression

expression(items)
Source code in hypha/apply/search/query_parser.py
def expression(self, items):
    filters = {}
    text = []
    for item in items:
        if isinstance(item, dict):
            for key, value in item.items():
                if key in filters:
                    filters[key].append(value)
                else:
                    filters[key] = [value]
        else:
            text.append(item)
    return {"filters": filters, "text": " ".join(str(t) for t in text)}

filter_expression

filter_expression(items)
Source code in hypha/apply/search/query_parser.py
def filter_expression(self, items):
    if len(items) == 3:
        key = items[0]
        value = items[2]
        return {key: value}
    else:
        return {"id": items[1]}

search_term

search_term(items)
Source code in hypha/apply/search/query_parser.py
def search_term(self, items):
    return items[0]

string

string(s)
Source code in hypha/apply/search/query_parser.py
def string(self, s):
    (s,) = s
    return s.value

NUMBER

NUMBER(s)
Source code in hypha/apply/search/query_parser.py
def NUMBER(self, s):
    return int(s.value)

ESCAPED_STRING

ESCAPED_STRING(s)
Source code in hypha/apply/search/query_parser.py
def ESCAPED_STRING(self, s):
    return s.value[1:-1]

tokenize_date_filter_value

tokenize_date_filter_value(date_str)

Convert a date filter string into a list of tokens.

Format: [operator][year][-[month]-[day]] The tokens are: - The operator (>=, <=, >, <) (if present) - The year - The month (if present) - The day (if present)

Source code in hypha/apply/search/query_parser.py
def tokenize_date_filter_value(date_str: str) -> list:
    """Convert a date filter string into a list of tokens.

    Format: [operator][year][-[month]-[day]]
    The tokens are:
    - The operator (>=, <=, >, <) (if present)
    - The year
    - The month (if present)
    - The day (if present)
    """
    # Define the regex pattern
    regex_pattern = r"^(<=|>=|<|>)?(\d{4}(?:-\d{2}(?:-\d{2})?)?(?:-\d{2})?)$"

    # Match the regex pattern to the value
    match = re.match(regex_pattern, date_str)

    # Extract the operator and date from the match object
    operator = match.group(1)
    date_str = match.group(2)

    # Convert date_str to a datetime object
    match len(date_str):
        case 4:
            # Date string is only a year
            return [operator, int(date_str)]
        case 7:
            # Date string is in the format YYYY-MM
            try:
                date = dt.datetime.strptime(date_str, "%Y-%m")
                return [operator, date.year, date.month]
            except ValueError:
                return []
        case _:
            try:
                date = dt.datetime.strptime(date_str, "%Y-%m-%d")
                return [operator, date.year, date.month, date.day]
            except ValueError:
                return []

parse_search_query

parse_search_query(search_query)

Parses Gmail-like search query string into a dictionary of filters and the remaining text. Example: "from:johndoe@example.com to:janedoe@example.com subject:hello world #12" would be parsed into: { "filters": { "from": ["johndoe@example.com"], "to": ["janedoe@example.com"], "subject": ["hello", "world"], "id": ["12"] }, "text": "hello world" }

Source code in hypha/apply/search/query_parser.py
def parse_search_query(search_query: str) -> dict:
    """
    Parses Gmail-like search query string into a dictionary of filters and
    the remaining text.
    Example: "from:[email protected] to:[email protected] subject:hello world #12"
    would be parsed into:
    {
        "filters": {
            "from": ["[email protected]"],
            "to": ["[email protected]"],
            "subject": ["hello", "world"],
            "id": ["12"]
        },
        "text": "hello world"
    }
    """
    return parser.parse(search_query)