Skip to content

Pre validation

Validation

You can check the markup argument before initialization. And if the check fails, it will throw SchemaPreValidationError exception

from scrape_schema import BaseSchema, Parsel
from scrape_schema.validator import markup_pre_validator


class Schema(BaseSchema):
    text: str = Parsel().css("h1::text").get()

    @markup_pre_validator()
    def validate(self) -> bool:
        # manual pre-validate rule
        return self.__selector__.xpath("//h1/text()").get() == "Hello, Parsel!"



Schema("<h1>Hello, Parsel!</h1>")  # OK
Schema("<h1>spam egg</h1>")  # raise SchemaPreValidationError

Also, decorator have shortcuts for quick pre validate document:

from scrape_schema import BaseSchema, Parsel
from scrape_schema.validator import markup_pre_validator
from scrape_schema.exceptions import SchemaPreValidationError


class ValidateSchemaXpath(BaseSchema):
    title: str = Parsel().xpath("//title/text()").get()

    @markup_pre_validator(xpath="//title")
    def validate(self):
        return True


class ValidateSchemaCss(BaseSchema):
    title: str = Parsel().xpath("//title/text()").get()

    @markup_pre_validator(css="title")
    def validate(self):
        return True


class ValidateSchemaRe(BaseSchema):
    title: str = Parsel().xpath("//title/text()").get()

    @markup_pre_validator(pattern=r"<title>.*?</title>")
    def validate(self):
        return True


if __name__ == '__main__':
    text = "<title>hello, scrape-schema!</title>"
    text_2 = "<h1>hello, scrape-schema!</h1>"
    # passed validation
    print(ValidateSchemaXpath(text).dict())
    print(ValidateSchemaCss(text).dict())
    print(ValidateSchemaRe(text).dict())
    # failed
    try:
        ValidateSchemaXpath(text_2)
    except SchemaPreValidationError:
        print("failed")
    try:
        ValidateSchemaCss(text_2)
    except SchemaPreValidationError:
        print("failed")
    try:
        ValidateSchemaRe(text_2)
    except SchemaPreValidationError:
        print("failed")
# {'title': 'hello, scrape-schema!'}
# {'title': 'hello, scrape-schema!'}
# {'title': 'hello, scrape-schema!'}
# failed
# failed
# failed