1
0
Fork 0
mirror of https://gitlab.com/bramw/baserow.git synced 2025-04-12 16:28:06 +00:00

Resolve "Email frontend validation does not match the backend"

This commit is contained in:
Nigel Gott 2021-07-09 13:33:36 +00:00
parent a7174d4d8a
commit 7848b8b457
7 changed files with 339 additions and 121 deletions
backend
requirements
src/baserow/contrib/database
tests/baserow/contrib/database/field
changelog.md
web-frontend
modules/core/utils
test/unit/core/utils

View file

@ -24,3 +24,4 @@ unicodecsv==0.14.1
django-celery-beat==2.2.0
celery-redbeat==2.0.0
service-identity==21.1.0
regex==2021.4.4

View file

@ -1,3 +1,4 @@
from abc import ABC, abstractmethod
from collections import defaultdict
from datetime import datetime, date
from decimal import Decimal
@ -9,7 +10,7 @@ from dateutil.parser import ParserError
from django.contrib.postgres.fields import JSONField
from django.core.exceptions import ValidationError
from django.core.files.storage import default_storage
from django.core.validators import URLValidator, EmailValidator, RegexValidator
from django.core.validators import URLValidator
from django.db import models
from django.db.models import Case, When, Q, F, Func, Value, CharField
from django.db.models.expressions import RawSQL
@ -57,6 +58,81 @@ from .models import (
PhoneNumberField,
)
from .registries import FieldType, field_type_registry
from baserow.contrib.database.validators import UnicodeRegexValidator
class CharFieldMatchingRegexFieldType(FieldType, ABC):
"""
This is an abstract FieldType you can extend to create a field which is a CharField
but restricted to only allow values passing a regex. Please implement the regex,
max_length and random_value properties.
This abstract class will then handle all the various places that this regex needs to
be used:
- by setting the char field's validator
- by setting the serializer field's validator
- checking values passed to prepare_value_for_db pass the regex
- by checking and only converting column values which match the regex when
altering a column to being an email type.
"""
@property
@abstractmethod
def regex(self):
pass
@property
@abstractmethod
def max_length(self):
return None
@property
def validator(self):
return UnicodeRegexValidator(regex_value=self.regex)
def prepare_value_for_db(self, instance, value):
if value == "" or value is None:
return ""
self.validator(value)
return value
def get_serializer_field(self, instance, **kwargs):
return serializers.CharField(
required=False,
allow_null=True,
allow_blank=True,
validators=[self.validator],
max_length=self.max_length,
**kwargs,
)
def get_model_field(self, instance, **kwargs):
return models.CharField(
default="",
blank=True,
null=True,
max_length=self.max_length,
validators=[self.validator],
**kwargs,
)
def get_alter_column_prepare_new_value(self, connection, from_field, to_field):
if connection.vendor == "postgresql":
return f"""p_in = (
case
when p_in::text ~* '{self.regex}'
then p_in::text
else ''
end
);"""
return super().get_alter_column_prepare_new_value(
connection, from_field, to_field
)
def contains_query(self, *args):
return contains_filter(*args)
class TextFieldType(FieldType):
@ -926,46 +1002,33 @@ class LinkRowFieldType(FieldType):
return [field.link_row_related_field]
class EmailFieldType(FieldType):
class EmailFieldType(CharFieldMatchingRegexFieldType):
type = "email"
model_class = EmailField
def prepare_value_for_db(self, instance, value):
if value == "" or value is None:
return ""
@property
def regex(self):
"""
Returns a highly permissive regex which allows non-valid emails in order to keep
the regex as simple as possible and also the same behind the frontend, database
and python code.
"""
# Use a lookahead to validate entire string length does exceed max length
# as we are matching multiple different tokens in the following regex.
lookahead = rf"(?=^.{{3,{self.max_length}}}$)"
# See wikipedia for allowed punctuation etc:
# https://en.wikipedia.org/wiki/Email_address#Local-part
local_and_domain = r"[-\.\[\]!#$&*+/=?^_`{|}~\w]+"
return rf"(?i){lookahead}^{local_and_domain}@{local_and_domain}$"
validator = EmailValidator()
validator(value)
return value
def get_serializer_field(self, instance, **kwargs):
return serializers.EmailField(
required=False, allow_null=True, allow_blank=True, **kwargs
)
def get_model_field(self, instance, **kwargs):
return models.EmailField(default="", blank=True, null=True, **kwargs)
@property
def max_length(self):
# max_length=254 to be compliant with RFCs 3696 and 5321
return 254
def random_value(self, instance, fake, cache):
return fake.email()
def get_alter_column_prepare_new_value(self, connection, from_field, to_field):
if connection.vendor == "postgresql":
return r"""p_in = (
case
when p_in::text ~* '[A-Z0-9._+-]+@[A-Z0-9.-]+\.[A-Z]{2,}'
then p_in::text
else ''
end
);"""
return super().get_alter_column_prepare_new_value(
connection, from_field, to_field
)
def contains_query(self, *args):
return contains_filter(*args)
class FileFieldType(FieldType):
type = "file"
@ -1399,7 +1462,7 @@ class SingleSelectFieldType(FieldType):
)
class PhoneNumberFieldType(FieldType):
class PhoneNumberFieldType(CharFieldMatchingRegexFieldType):
"""
A simple wrapper around a TextField which ensures any entered data is a
simple phone number.
@ -1412,70 +1475,32 @@ class PhoneNumberFieldType(FieldType):
model_class = PhoneNumberField
MAX_PHONE_NUMBER_LENGTH = 100
"""
According to the E.164 (https://en.wikipedia.org/wiki/E.164) standard for
international numbers the max length of an E.164 number without formatting is 15
characters. However we allow users to store formatting characters, spaces and
expect them to be entering numbers not in the E.164 standard but instead a
wide range of local standards which might support longer numbers.
This is why we have picked a very generous 100 character length to support heavily
formatted local numbers.
"""
PHONE_NUMBER_REGEX = rf"^[0-9NnXx,+._*()#=;/ -]{{1,{MAX_PHONE_NUMBER_LENGTH}}}$"
"""
Allow common punctuation used in phone numbers and spaces to allow formatting,
but otherwise don't allow text as the phone number should work as a link on mobile
devices.
Duplicated in the frontend code at, please keep in sync:
web-frontend/modules/core/utils/string.js#isSimplePhoneNumber
"""
@property
def regex(self):
"""
Allow common punctuation used in phone numbers and spaces to allow formatting,
but otherwise don't allow text as the phone number should work as a link on
mobile devices.
Duplicated in the frontend code at, please keep in sync:
web-frontend/modules/core/utils/string.js#isSimplePhoneNumber
"""
simple_phone_number_validator = RegexValidator(regex=PHONE_NUMBER_REGEX)
return rf"^[0-9NnXx,+._*()#=;/ -]{{1,{self.max_length}}}$"
def prepare_value_for_db(self, instance, value):
if value == "" or value is None:
return ""
self.simple_phone_number_validator(value)
@property
def max_length(self):
"""
According to the E.164 (https://en.wikipedia.org/wiki/E.164) standard for
international numbers the max length of an E.164 number without formatting is 15
characters. However we allow users to store formatting characters, spaces and
expect them to be entering numbers not in the E.164 standard but instead a
wide range of local standards which might support longer numbers.
This is why we have picked a very generous 100 character length to support
heavily formatted local numbers.
"""
return value
def get_serializer_field(self, instance, **kwargs):
return serializers.CharField(
required=False,
allow_null=True,
allow_blank=True,
validators=[self.simple_phone_number_validator],
max_length=self.MAX_PHONE_NUMBER_LENGTH,
**kwargs,
)
def get_model_field(self, instance, **kwargs):
return models.CharField(
default="",
blank=True,
null=True,
max_length=self.MAX_PHONE_NUMBER_LENGTH,
validators=[self.simple_phone_number_validator],
**kwargs,
)
return self.MAX_PHONE_NUMBER_LENGTH
def random_value(self, instance, fake, cache):
return fake.phone_number()
def get_alter_column_prepare_new_value(self, connection, from_field, to_field):
if connection.vendor == "postgresql":
return f"""p_in = (
case
when p_in::text ~* '{self.PHONE_NUMBER_REGEX}'
then p_in::text
else ''
end
);"""
return super().get_alter_column_prepare_new_value(
connection, from_field, to_field
)
def contains_query(self, *args):
return contains_filter(*args)

View file

@ -0,0 +1,76 @@
import regex
from django.core.exceptions import ValidationError
from django.utils.deconstruct import deconstructible
from django.utils.functional import SimpleLazyObject
from django.utils.translation import gettext_lazy as _
def _lazy_re_compile(regex_value, flags=0):
"""Lazily compile a regex with flags."""
def _compile():
# Compile the regex if it was not passed pre-compiled.
if isinstance(regex_value, str):
return regex.compile(regex_value, flags)
else:
assert not flags, "flags must be empty if regex is passed pre-compiled"
return regex_value
return SimpleLazyObject(_compile)
@deconstructible
class UnicodeRegexValidator:
"""
Amazingly the standard python re regex library does not correctly match valid
unicode word characters https://bugs.python.org/issue12731 ...
This is an exact copy of Django's Regex validator, but instead using the swap in
replacement regex library instead of re, which does handle unicode correctly!
"""
regex = ""
message = _("Enter a valid value.")
code = "invalid"
inverse_match = False
flags = 0
def __init__(
self, regex_value=None, message=None, code=None, inverse_match=None, flags=None
):
if regex_value is not None:
self.regex_value = regex_value
if message is not None:
self.message = message
if code is not None:
self.code = code
if inverse_match is not None:
self.inverse_match = inverse_match
if flags is not None:
self.flags = flags
if self.flags and not isinstance(self.regex_value, str):
raise TypeError(
"If the flags are set, regex must be a regular expression string."
)
self.regex_value = _lazy_re_compile(self.regex_value, self.flags)
def __call__(self, value):
"""
Validate that the input contains (or does *not* contain, if
inverse_match is True) a match for the regular expression.
"""
regex_matches = self.regex_value.search(str(value))
invalid_input = regex_matches if self.inverse_match else not regex_matches
if invalid_input:
raise ValidationError(self.message, code=self.code)
def __eq__(self, other):
return (
isinstance(other, UnicodeRegexValidator)
and self.regex_value.pattern == other.regex_value.pattern
and self.regex_value.flags == other.regex_value.flags
and (self.message == other.message)
and (self.code == other.code)
and (self.inverse_match == other.inverse_match)
)

View file

@ -1,18 +1,18 @@
import pytest
from django.core.exceptions import ValidationError
from django.test.utils import override_settings
from faker import Faker
from django.core.exceptions import ValidationError
from baserow.contrib.database.fields.field_types import PhoneNumberFieldType
from baserow.contrib.database.fields.field_types import (
PhoneNumberFieldType,
)
from baserow.contrib.database.fields.handler import FieldHandler
from baserow.contrib.database.fields.models import (
LongTextField,
URLField,
EmailField,
PhoneNumberField,
)
from baserow.contrib.database.fields.handler import FieldHandler
from baserow.contrib.database.fields.registries import field_type_registry
from baserow.contrib.database.rows.handler import RowHandler
from tests.test_utils import setup_interesting_test_table
@ -192,6 +192,85 @@ def test_url_field_type(data_fixture):
assert len(URLField.objects.all()) == 2
@pytest.mark.django_db
def test_valid_email(data_fixture):
user = data_fixture.create_user()
table = data_fixture.create_database_table(user=user)
data_fixture.create_database_table(user=user, database=table.database)
field = data_fixture.create_text_field(table=table, order=1, name="name")
field_handler = FieldHandler()
row_handler = RowHandler()
field_handler.create_field(user=user, table=table, type_name="email", name="email")
model = table.get_model(attribute_names=True)
invalid_emails = [
"test@" + "a" * 246 + ".com",
"@a",
"a@",
"not-an-email",
"bram.test.nl",
"invalid_email",
"invalid@invalid@com",
"\nhello@gmail.com",
"asdds asdd@gmail.com",
]
for invalid_email in invalid_emails:
with pytest.raises(ValidationError):
print(invalid_email)
row_handler.create_row(
user=user, table=table, values={"email": invalid_email}, model=model
)
valid_emails = [
"test@" + "a" * 245 + ".com",
"a@a",
"用户@例子.广告",
"अजय@डाटा.भारत",
"квіточка@пошта.укр",
"χρήστης@παράδειγμα.ελ",
"Dörte@Sörensen.example.com",
"коля@пример.рф",
"bram@localhost",
"bram@localhost.nl",
"first_part_underscores_ok@hyphens-ok.com",
"wierd@[1.1.1.1]",
"bram.test.test@sub.domain.nl",
"BRAM.test.test@sub.DOMAIN.nl",
]
for email in valid_emails:
row_handler.create_row(
user=user,
table=table,
values={"email": email, "name": email},
model=model,
)
for bad_email in invalid_emails:
row_handler.create_row(
user=user,
table=table,
values={"email": "", "name": bad_email},
model=model,
)
# Convert the text field to a email field so we can check how the conversion of
# values went.
field_handler.update_field(user=user, field=field, new_type_name="email")
rows = model.objects.all()
i = 0
for email in valid_emails:
assert rows[i].email == email
assert rows[i].name == email
i += 1
for _ in invalid_emails:
assert rows[i].email == ""
assert rows[i].name == ""
i += 1
@pytest.mark.django_db
def test_email_field_type(data_fixture):
user = data_fixture.create_user()
@ -212,16 +291,6 @@ def test_email_field_type(data_fixture):
assert len(EmailField.objects.all()) == 1
model = table.get_model(attribute_names=True)
with pytest.raises(ValidationError):
row_handler.create_row(
user=user, table=table, values={"email": "invalid_email"}, model=model
)
with pytest.raises(ValidationError):
row_handler.create_row(
user=user, table=table, values={"email": "invalid@email"}, model=model
)
row_handler.create_row(
user=user,
table=table,
@ -264,7 +333,6 @@ def test_email_field_type(data_fixture):
},
model=model,
)
row_handler.create_row(user=user, table=table, values={}, model=model)
# Convert the text field to a url field so we can check how the conversion of
# values went.
@ -298,10 +366,6 @@ def test_email_field_type(data_fixture):
assert rows[5].email == ""
assert rows[5].number == ""
assert rows[6].name == ""
assert rows[6].email == ""
assert rows[6].number == ""
field_handler.delete_field(user=user, field=field_2)
assert len(EmailField.objects.all()) == 2

View file

@ -3,6 +3,8 @@
## Unreleased
* Made it possible to list table field meta-data with a token.
* The email field's validation is now consistent and much more permissive allowing most
values which look like email addresses.
* Add trash where deleted apps, groups, tables, fields and rows can be restored
deletion.
* Fix the create group invite endpoint failing when no message provided.

View file

@ -49,7 +49,28 @@ export const isValidURL = (str) => {
}
export const isValidEmail = (str) => {
const pattern = /[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/i
// Please keep these regex in sync with the backend
// See baserow.contrib.database.fields.field_types.EmailFieldType
// Javascript does not support using \w to match unicode letters like python.
// Instead we match all unicode letters including ones with modifiers by using the
// regex \p{L}\p{M}* taken from https://www.regular-expressions.info/unicode.html
// Unicode Categories section.
const lookahead = /(?=^.{3,254}$)/
// The regex property escapes below are supported as of ES 2018.
const localAndDomain = /([-.[\]!#$&*+/=?^_`{|}~0-9]|\p{L}\p{M}*)+/
const start = /^/
const at = /@/
const end = /$/
const pattern = new RegExp(
lookahead.source +
start.source +
localAndDomain.source +
at.source +
localAndDomain.source +
end.source,
'iu'
)
return !!pattern.test(str)
}

View file

@ -50,12 +50,41 @@ describe('test string utils', () => {
})
test('test isValidEmail', () => {
expect(isValidEmail('bram.test.nl')).toBe(false)
expect(isValidEmail('not-an-email')).toBe(false)
expect(isValidEmail('bram@localhost')).toBe(false)
expect(isValidEmail('bram@localhost.nl')).toBe(true)
expect(isValidEmail('bram.test.test@sub.domain.nl')).toBe(true)
expect(isValidEmail('BRAM.test.test@sub.DOMAIN.nl')).toBe(true)
const invalidEmails = [
'test@' + 'a'.repeat(246) + '.com',
'@a',
'a@',
'not-an-email',
'bram.test.nl',
'invalid_email',
'invalid@invalid@com',
'\nhello@gmail.com',
'asdds asdd@gmail.com',
]
const validEmails = [
'test@' + 'a'.repeat(245) + '.com',
'a@a',
'用户@例子.广告',
'अजय@डाटा.भारत',
'квіточка@пошта.укр',
'χρήστης@παράδειγμα.ελ',
'Dörte@Sörensen.example.com',
'коля@пример.рф',
'bram@localhost',
'bram@localhost.nl',
'first_part_underscores_ok@hyphens-ok.com',
'wierd@[1.1.1.1]',
'bram.test.test@sub.domain.nl',
'BRAM.test.test@sub.DOMAIN.nl',
]
for (const invalidEmail of invalidEmails) {
expect(isValidEmail(invalidEmail)).toBe(false)
}
for (const validEmail of validEmails) {
expect(isValidEmail(validEmail)).toBe(true)
}
})
test('test isSecureURL', () => {