Skip to content

Schema

Source code in scrape_schema/base.py
class BaseSchema(metaclass=SchemaMeta):
    __schema_fields__: Dict[str, BaseField]
    __schema_annotations__: Dict[str, Type]
    __schema_aliases__: Dict[str, str]

    """Main schema class

    Attributes:
        __schema_fields__: Dict[str, BaseField] access to fields object by key in current schema
        __schema_annotations__: Dict[str, Type] access to fields annotations in current schema
        __schema_aliases__: Dict[str, str] access to fields aliases in current schema

    """

    class Config(SchemaConfig):
        pass

    @property
    def __sc_params__(self) -> Dict[str, Any]:
        """Magic method for access all @sc_param decorated properties

        Returns:
            dict with all @sc_param decorated properties

        """
        return {
            k: v for k, v in self.__class__.__dict__.items() if isinstance(v, sc_param)
        }

    @property
    def __selector__(self) -> Union[Selector, SelectorList]:
        """Get available cached Parsel.Selector or SelectorList object

        Returns:
            Parsel SelectorType object
        """
        return self._cached_parser

    def __init__(self, markup: Union[str, bytes, Selector, SelectorList]):
        """Create a new object by parsing fields from markup.

        Args:
            markup: string, bytes or parsel.Selector object
        Raises:
            TypeError: if markup is not string, bytes or Selector objects
        """
        self._cached_parser: Union[Selector, SelectorList]
        self._markup: str

        self.__init_markup(markup)
        self.__pre_validate_markup()
        self.__init_fields()

    def __init_markup(self, markup: Union[str, bytes, Selector, SelectorList]):
        if isinstance(markup, str):
            self._markup = markup
            self._cached_parser = Selector(markup, **self.Config.selector_kwargs)
        elif isinstance(markup, bytes):
            self._cached_parser = Selector(body=markup, **self.Config.selector_kwargs)
            self._markup = markup.decode()
        elif isinstance(markup, (Selector, SelectorList)):
            self._markup = markup.get()  # type: ignore
            self._cached_parser = markup
        else:
            raise TypeError(
                f"Markup support only str, bytes or Selector types, not {type(markup).__name__}"
            )

    def __pre_validate_markup(self):
        # find @markup_pre_validator decorated methods and validate
        for k, v in self.__class__.__dict__.items():
            if isinstance(v, BaseField) or isinstance(v, sc_param):
                continue
            if getattr(v, "__dict__", None) and (
                pre_validator := v.__dict__.get("__wrapped__")
            ):
                if issubclass(pre_validator, markup_pre_validator):
                    if not getattr(self, k)():
                        msg = f"Validation error in {self.__schema_name__}.{k} method"
                        raise SchemaPreValidationError(msg)

    def __init_fields(self) -> None:
        """Parse fields entrypoint.

        Automatically called in the `__init__` constructor
        """
        _logger.info(
            "[%s] Start parse fields count: %s",
            self.__schema_name__,
            len(self.__schema_fields__.keys()),
        )
        for name, field in self.__schema_fields__.items():
            field_type = self.__schema_annotations__[name]
            _logger.debug("Start parse attribute: `%s.%s`", self.__schema_name__, name)
            if getattr(field, "__I_AM_NESTED_FIELD__", False):
                field.type_ = field_type  # type: ignore
            value = field.sc_parse(self.__selector__)
            if self.Config.type_caster and field.auto_type and not field.is_default:
                value = self.Config.type_caster.cast(field_type, value)
            if not field._is_success and not field.is_default:
                _logger.error("Parse error in %s.%s field", self.__schema_name__, name)

            # disable default value flag
            if field.is_default:
                _logger.error(
                    "`%s.%s` failed parse in %r method, set default value",
                    self.__schema_name__,
                    name,
                    field._last_failed_method,
                )  # type: ignore
                field.is_default = False

            _logger.info("%s.%s = %s", self.__schema_name__, name, value)
            setattr(self, name, value)

    @property
    def __raw__(self) -> str:
        """Get raw string markdown value

        Returns:
            markup string object
        """
        return self._markup

    @staticmethod
    def _to_dict(
        value: Union["BaseSchema", List, Dict, Any]
    ) -> Union[List[Dict[str, Any]], Dict[str, Any], Any]:
        """convert BaseSchema objects to build-in python objects like dict, list"""
        if isinstance(value, BaseSchema):
            return value.dict()

        elif isinstance(value, list):
            if all(isinstance(val, BaseSchema) for val in value):  # pragma: no cover
                return [val.dict() for val in value]
        return value

    def dict(self, *, by_alias: bool = True) -> Dict[str, Any]:
        """Convert schema object to python dict. if field have alias key - set alias key
        Args:
            by_alias: bool set key by field alias. Default True
        Returns:
            dictionary with all public fields and sc_param properties
        """
        result: Dict[str, Any] = {  # type: ignore
            self.__schema_aliases__.get(k, k): self._to_dict(getattr(self, k))
            for k, v in self.__sc_params__.items()
        }
        # parse public field keys
        for k, v in self.__dict__.items():
            if not k.startswith("_") and self.__schema_fields__.get(k):
                k = self.__schema_aliases__.get(k, k) if by_alias else k
                result[k] = self._to_dict(v)
        return result

    def __repr__(self):
        return f'{self.__schema_name__}({", ".join(self.__repr_args__())})'

    def __repr_args__(self) -> List[str]:
        args: Dict[str, Any] = {  # type: ignore
            k: getattr(self, k) for k, v in self.__sc_params__.items()
        }
        # parse public field keys
        for k, v in self.__dict__.items():
            if not k.startswith("_") and self.__schema_fields__.get(k):  # type: ignore
                if alias := self.__schema_aliases__.get(k):  # type: ignore
                    args[f"{alias}({k})"] = v
                else:
                    args[k] = v

        return [
            f"{k}={repr(v)}"
            if isinstance(v, BaseSchema)
            else f"{k}:{type(v).__name__}={repr(v)}"
            for k, v in args.items()
        ]

    @property
    def __schema_name__(self) -> str:
        """

        Returns:
            class name
        """
        return self.__class__.__name__

__raw__ property

Get raw string markdown value

Returns:

Type Description
str

markup string object

__sc_params__ property

Magic method for access all @sc_param decorated properties

Returns:

Type Description
Dict[str, Any]

dict with all @sc_param decorated properties

__schema_aliases__ instance-attribute

Main schema class

Attributes:

Name Type Description
__schema_fields__

Dict[str, BaseField] access to fields object by key in current schema

__schema_annotations__

Dict[str, Type] access to fields annotations in current schema

__schema_aliases__

Dict[str, str] access to fields aliases in current schema

__schema_name__ property

Returns:

Type Description
str

class name

__selector__ property

Get available cached Parsel.Selector or SelectorList object

Returns:

Type Description
Union[Selector, SelectorList]

Parsel SelectorType object

__init__(markup)

Create a new object by parsing fields from markup.

Parameters:

Name Type Description Default
markup Union[str, bytes, Selector, SelectorList]

string, bytes or parsel.Selector object

required

Raises: TypeError: if markup is not string, bytes or Selector objects

Source code in scrape_schema/base.py
def __init__(self, markup: Union[str, bytes, Selector, SelectorList]):
    """Create a new object by parsing fields from markup.

    Args:
        markup: string, bytes or parsel.Selector object
    Raises:
        TypeError: if markup is not string, bytes or Selector objects
    """
    self._cached_parser: Union[Selector, SelectorList]
    self._markup: str

    self.__init_markup(markup)
    self.__pre_validate_markup()
    self.__init_fields()

__init_fields()

Parse fields entrypoint.

Automatically called in the __init__ constructor

Source code in scrape_schema/base.py
def __init_fields(self) -> None:
    """Parse fields entrypoint.

    Automatically called in the `__init__` constructor
    """
    _logger.info(
        "[%s] Start parse fields count: %s",
        self.__schema_name__,
        len(self.__schema_fields__.keys()),
    )
    for name, field in self.__schema_fields__.items():
        field_type = self.__schema_annotations__[name]
        _logger.debug("Start parse attribute: `%s.%s`", self.__schema_name__, name)
        if getattr(field, "__I_AM_NESTED_FIELD__", False):
            field.type_ = field_type  # type: ignore
        value = field.sc_parse(self.__selector__)
        if self.Config.type_caster and field.auto_type and not field.is_default:
            value = self.Config.type_caster.cast(field_type, value)
        if not field._is_success and not field.is_default:
            _logger.error("Parse error in %s.%s field", self.__schema_name__, name)

        # disable default value flag
        if field.is_default:
            _logger.error(
                "`%s.%s` failed parse in %r method, set default value",
                self.__schema_name__,
                name,
                field._last_failed_method,
            )  # type: ignore
            field.is_default = False

        _logger.info("%s.%s = %s", self.__schema_name__, name, value)
        setattr(self, name, value)

dict(*, by_alias=True)

Convert schema object to python dict. if field have alias key - set alias key Args: by_alias: bool set key by field alias. Default True Returns: dictionary with all public fields and sc_param properties

Source code in scrape_schema/base.py
def dict(self, *, by_alias: bool = True) -> Dict[str, Any]:
    """Convert schema object to python dict. if field have alias key - set alias key
    Args:
        by_alias: bool set key by field alias. Default True
    Returns:
        dictionary with all public fields and sc_param properties
    """
    result: Dict[str, Any] = {  # type: ignore
        self.__schema_aliases__.get(k, k): self._to_dict(getattr(self, k))
        for k, v in self.__sc_params__.items()
    }
    # parse public field keys
    for k, v in self.__dict__.items():
        if not k.startswith("_") and self.__schema_fields__.get(k):
            k = self.__schema_aliases__.get(k, k) if by_alias else k
            result[k] = self._to_dict(v)
    return result

BaseSchema configuration

Attributes:

Name Type Description
selector_kwargs Dict[str, Any]

default kwargs for parsel.Selector class

type_caster Optional[TypeCaster]

type_caster module

Source code in scrape_schema/base.py
class SchemaConfig:
    """BaseSchema configuration

    Attributes:
        selector_kwargs: default kwargs for parsel.Selector class
        type_caster: type_caster module
    """

    selector_kwargs: Dict[str, Any] = {}  # default execute extra kwargs
    type_caster: Optional[TypeCaster] = TypeCaster()  # type_caster class