angrybeanie_wagtail/env/lib/python3.12/site-packages/wagtail/search/utils.py

import operator
import re
from functools import partial

from django.apps import apps
from django.db import connections
from django.http import QueryDict

from wagtail.search.index import RelatedFields, SearchField

from .query import MATCH_NONE, Phrase, PlainText

NOT_SET = object()


def balanced_reduce(operator, seq, initializer=NOT_SET):
    """
    Has the same result as Python's reduce function, but performs the calculations in a different order.

    This is important when the operator is constructing data structures such as search query classes.
    This method will make the resulting data structures flatter, so operations that need to traverse
    them don't end up crashing with recursion errors.

    For example:

    Python's builtin reduce() function will do the following calculation:

    reduce(add, [1, 2, 3, 4, 5, 6, 7, 8])
    (1 + (2 + (3 + (4 + (5 + (6 + (7 + 8)))))))

    When using this with query classes, it would create a large data structure with a depth of 7
    Whereas balanced_reduce will execute this like so:

    balanced_reduce(add, [1, 2, 3, 4, 5, 6, 7, 8])
    ((1 + 2) + (3 + 4)) + ((5 + 6) + (7 + 8))

    Which only has a depth of 2
    """
    # Casting all iterables to list makes the implementation simpler
    if not isinstance(seq, list):
        seq = list(seq)

    # Note, it needs to be possible to use None as an initial value
    if initializer is not NOT_SET:
        if len(seq) == 0:
            return initializer
        else:
            return operator(initializer, balanced_reduce(operator, seq))

    if len(seq) == 0:
        raise TypeError("reduce() of empty sequence with no initial value")
    elif len(seq) == 1:
        return seq[0]
    else:
        break_point = len(seq) // 2
        first_set = balanced_reduce(operator, seq[:break_point])
        second_set = balanced_reduce(operator, seq[break_point:])
        return operator(first_set, second_set)


# Reduce any iterable to a single value using a logical OR e.g. (a | b | ...)
OR = partial(balanced_reduce, operator.or_)
# Reduce any iterable to a single value using a logical AND e.g. (a & b & ...)
AND = partial(balanced_reduce, operator.and_)
# Reduce any iterable to a single value using an addition
ADD = partial(balanced_reduce, operator.add)
# Reduce any iterable to a single value using a multiplication
MUL = partial(balanced_reduce, operator.mul)

MAX_QUERY_STRING_LENGTH = 255

filters_regexp = re.compile(r'\b(\w+):(\w+|"[^"]+"|\'[^\']+\')')


def normalise_query_string(query_string):
    # Truncate query string
    query_string = query_string[:MAX_QUERY_STRING_LENGTH]
    # Convert query_string to lowercase
    query_string = query_string.lower()

    # Remove leading, trailing and multiple spaces
    query_string = re.sub(" +", " ", query_string).strip()

    return query_string


def separate_filters_from_query(query_string):
    filters = QueryDict(mutable=True)
    for match_object in filters_regexp.finditer(query_string):
        key, value = match_object.groups()
        filters.update({key: value.strip("\"'")})

    query_string = filters_regexp.sub("", query_string).strip()

    return filters, query_string


def parse_query_string(query_string, operator=None, zero_terms=MATCH_NONE):
    """
    This takes a query string typed in by a user and extracts the following:

     - Quoted terms (for phrase search)
     - Filters

    For example, the following query:

      `hello "this is a phrase" live:true` would be parsed into:

    filters: {'live': 'true'}
    tokens: And([PlainText('hello'), Phrase('this is a phrase')])
    """
    filters, query_string = separate_filters_from_query(query_string)

    is_phrase = False
    tokens = []
    if '"' in query_string:
        parts = query_string.split('"')
    else:
        parts = query_string.split("'")

    for part in parts:
        part = part.strip()

        if part:
            if is_phrase:
                tokens.append(Phrase(part))
            else:
                tokens.append(
                    PlainText(part, operator=operator or PlainText.DEFAULT_OPERATOR)
                )

        is_phrase = not is_phrase

    if tokens:
        if operator == "or":
            search_query = OR(tokens)
        else:
            search_query = AND(tokens)
    else:
        search_query = zero_terms

    return filters, search_query


def get_descendant_models(model):
    """
    Returns all descendants of a model, including the model itself.
    """
    descendant_models = {
        other_model
        for other_model in apps.get_models()
        if issubclass(other_model, model)
    }
    descendant_models.add(model)
    return descendant_models


def get_content_type_pk(model):
    # We import it locally because this file is loaded before apps are ready.
    from django.contrib.contenttypes.models import ContentType

    return ContentType.objects.get_for_model(model).pk


def get_ancestors_content_types_pks(model):
    """
    Returns content types ids for the ancestors of this model, excluding it.
    """
    from django.contrib.contenttypes.models import ContentType

    return [
        ct.pk
        for ct in ContentType.objects.get_for_models(
            *model._meta.get_parent_list()
        ).values()
    ]


def get_descendants_content_types_pks(model):
    """
    Returns content types ids for the descendants of this model, including it.
    """
    from django.contrib.contenttypes.models import ContentType

    return [
        ct.pk
        for ct in ContentType.objects.get_for_models(
            *get_descendant_models(model)
        ).values()
    ]


def get_search_fields(search_fields):
    for search_field in search_fields:
        if isinstance(search_field, SearchField):
            yield search_field
        elif isinstance(search_field, RelatedFields):
            yield from get_search_fields(search_field.fields)


def get_postgresql_connections():
    return [
        connection
        for connection in connections.all()
        if connection.vendor == "postgresql"
    ]