1
0
Fork 0
mirror of https://gitlab.com/bramw/baserow.git synced 2025-04-11 07:51:20 +00:00

Merge branch '997-fix-uploading-files-via-url' into 'develop'

Resolve "Fix uploading files via url by default crashing due to MemoryError"

Closes 

See merge request 
This commit is contained in:
Nigel Gott 2022-06-08 14:04:01 +00:00
commit db253b1e2a
9 changed files with 91 additions and 41 deletions
backend
requirements
src/baserow
config/settings
core/user_files
tests/baserow
changelog.mddocker-compose.yml
docs/installation

View file

@ -29,3 +29,4 @@ autopep8==1.5.7
pytest-unordered==0.4.1
debugpy==1.5.1
backports.cached-property==1.0.1
httpretty==1.1.4

View file

@ -71,6 +71,8 @@ gitpython==3.1.27
# via bandit
gprof2dot==2021.2.21
# via django-silk
httpretty==1.1.4
# via -r dev.in
icdiff==2.0.4
# via pytest-icdiff
idna==3.3

View file

@ -1,5 +1,6 @@
import datetime
import os
from decimal import Decimal
from urllib.parse import urlparse, urljoin
import dj_database_url
@ -449,7 +450,9 @@ MEDIA_ROOT = os.getenv("MEDIA_ROOT", "/baserow/media")
# Indicates the directory where the user files and user thumbnails are stored.
USER_FILES_DIRECTORY = "user_files"
USER_THUMBNAILS_DIRECTORY = "thumbnails"
USER_FILE_SIZE_LIMIT = 1024 * 1024 * 1024 * 1024 # ~1TB
BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = int(
Decimal(os.getenv("BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB", 1024 * 1024)) * 1024 * 1024
) # ~1TB by default
EXPORT_FILES_DIRECTORY = "export_files"
EXPORT_CLEANUP_INTERVAL_MINUTES = 5

View file

@ -194,9 +194,10 @@ class UserFileHandler:
size = stream_size(stream)
if size > settings.USER_FILE_SIZE_LIMIT:
if size > settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB:
raise FileSizeTooLargeError(
settings.USER_FILE_SIZE_LIMIT, "The provided file is too large."
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB,
"The provided file is too large.",
)
storage = storage or default_storage
@ -294,10 +295,26 @@ class UserFileHandler:
"The response did not respond with an " "OK status code."
)
content = response.raw.read(
settings.USER_FILE_SIZE_LIMIT + 1, decode_content=True
)
except (RequestException, UnacceptableAddressException):
try:
content_length = int(response.headers.get("Content-Length", ""))
if content_length > settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB:
raise FileSizeTooLargeError(
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB,
"The provided file is too large.",
)
except ValueError:
pass
content = b""
for chunk in response.iter_content(chunk_size=None):
content += chunk
if len(content) > settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB:
response.close()
raise FileSizeTooLargeError(
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB,
"The provided file is too large.",
)
except (RequestException, UnacceptableAddressException, ConnectionError):
raise FileURLCouldNotBeReached("The provided URL could not be reached.")
file = SimpleUploadedFile(file_name, content)

View file

@ -1,15 +1,13 @@
import pytest
import responses
from unittest.mock import patch
from freezegun import freeze_time
import httpretty as httpretty
import pytest
from PIL import Image
from django.shortcuts import reverse
from django.conf import settings
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.files.storage import FileSystemStorage
from django.core.files.uploadedfile import SimpleUploadedFile
from django.shortcuts import reverse
from freezegun import freeze_time
from rest_framework.status import (
HTTP_200_OK,
HTTP_400_BAD_REQUEST,
@ -42,15 +40,15 @@ def test_upload_file(api_client, data_fixture, tmpdir):
assert response.status_code == HTTP_400_BAD_REQUEST
assert response.json()["error"] == "ERROR_INVALID_FILE"
old_limit = settings.USER_FILE_SIZE_LIMIT
settings.USER_FILE_SIZE_LIMIT = 6
old_limit = settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = 6
response = api_client.post(
reverse("api:user_files:upload_file"),
data={"file": SimpleUploadedFile("test.txt", b"Hello World")},
format="multipart",
HTTP_AUTHORIZATION=f"JWT {token}",
)
settings.USER_FILE_SIZE_LIMIT = old_limit
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = old_limit
assert response.status_code == HTTP_413_REQUEST_ENTITY_TOO_LARGE
assert response.json()["error"] == "ERROR_FILE_SIZE_TOO_LARGE"
assert response.json()["detail"] == (
@ -146,7 +144,7 @@ def test_upload_file(api_client, data_fixture, tmpdir):
@pytest.mark.django_db
@responses.activate
@httpretty.activate(verbose=True, allow_net_connect=False)
def test_upload_file_via_url(api_client, data_fixture, tmpdir):
user, token = data_fixture.create_user_and_token(
email="test@test.nl", password="password", first_name="Test1"
@ -168,6 +166,11 @@ def test_upload_file_via_url(api_client, data_fixture, tmpdir):
assert response.status_code == HTTP_400_BAD_REQUEST
assert response.json()["error"] == "ERROR_REQUEST_BODY_VALIDATION"
httpretty.register_uri(
httpretty.GET,
"https://baserow.io/test2.txt",
status=404,
)
response = api_client.post(
reverse("api:user_files:upload_via_url"),
data={"url": "https://baserow.io/test2.txt"},
@ -185,17 +188,16 @@ def test_upload_file_via_url(api_client, data_fixture, tmpdir):
assert response.status_code == HTTP_400_BAD_REQUEST
assert response.json()["error"] == "ERROR_INVALID_FILE_URL"
responses.add(
responses.GET,
old_limit = settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = 6
httpretty.register_uri(
httpretty.GET,
"http://localhost/test.txt",
body=b"Hello World",
body="Hello World",
status=200,
content_type="text/plain",
stream=True,
)
old_limit = settings.USER_FILE_SIZE_LIMIT
settings.USER_FILE_SIZE_LIMIT = 6
response = api_client.post(
reverse("api:user_files:upload_via_url"),
data={"url": "http://localhost/test.txt"},
@ -203,7 +205,26 @@ def test_upload_file_via_url(api_client, data_fixture, tmpdir):
)
assert response.status_code == HTTP_413_REQUEST_ENTITY_TOO_LARGE
assert response.json()["error"] == "ERROR_FILE_SIZE_TOO_LARGE"
settings.USER_FILE_SIZE_LIMIT = old_limit
# If the content length is not specified then when streaming down the file we will
# check the file size.
httpretty.register_uri(
httpretty.GET,
"http://localhost/test2.txt",
body="Hello World",
forcing_headers={"Content-Length": None},
status=200,
content_type="text/plain",
)
response = api_client.post(
reverse("api:user_files:upload_via_url"),
data={"url": "http://localhost/test2.txt"},
HTTP_AUTHORIZATION=f"JWT {token}",
)
assert response.status_code == HTTP_413_REQUEST_ENTITY_TOO_LARGE
assert response.json()["error"] == "ERROR_FILE_SIZE_TOO_LARGE"
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = old_limit
storage = FileSystemStorage(location=str(tmpdir), base_url="http://localhost")
@ -215,7 +236,7 @@ def test_upload_file_via_url(api_client, data_fixture, tmpdir):
)
response_json = response.json()
assert response.status_code == HTTP_200_OK
assert response.status_code == HTTP_200_OK, response_json
assert response_json["size"] == 11
assert response_json["mime_type"] == "text/plain"
assert response_json["is_image"] is False

View file

@ -1,14 +1,14 @@
import pytest
import responses
import string
from freezegun import freeze_time
from PIL import Image
from io import BytesIO
import httpretty
import pytest
import responses
from PIL import Image
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.files.storage import FileSystemStorage
from freezegun import freeze_time
from baserow.core.models import UserFile
from baserow.core.user_files.exceptions import (
@ -89,11 +89,11 @@ def test_upload_user_file(data_fixture, tmpdir):
with pytest.raises(InvalidFileStreamError):
handler.upload_user_file(user, "test.txt", None, storage=storage)
old_limit = settings.USER_FILE_SIZE_LIMIT
settings.USER_FILE_SIZE_LIMIT = 6
old_limit = settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = 6
with pytest.raises(FileSizeTooLargeError):
handler.upload_user_file(user, "test.txt", ContentFile(b"Hello World"))
settings.USER_FILE_SIZE_LIMIT = old_limit
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = old_limit
with freeze_time("2020-01-01 12:00"):
user_file = handler.upload_user_file(
@ -219,24 +219,26 @@ def test_upload_user_file(data_fixture, tmpdir):
@pytest.mark.django_db
@responses.activate
@httpretty.activate(verbose=True, allow_net_connect=False)
def test_upload_user_file_by_url(data_fixture, tmpdir):
user = data_fixture.create_user()
storage = FileSystemStorage(location=str(tmpdir), base_url="http://localhost")
handler = UserFileHandler()
responses.add(
responses.GET,
httpretty.register_uri(
httpretty.GET,
"https://baserow.io/test.txt",
body=b"Hello World",
status=200,
content_type="text/plain",
stream=True,
)
responses.add(
responses.GET,
)
httpretty.register_uri(
httpretty.GET,
"https://baserow.io/not-found.pdf",
status=404,
)

View file

@ -34,6 +34,8 @@
to running celery with the same number of processes as the number of available cores.
* When the BASEROW_AMOUNT_OF_WORKERS env variable is set to blank, the amount of worker
processes defaults to the number of available cores.
* Fixed bug preventing file uploads via an url for self-hosters
* Added new environment variable BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB
## Released (2022-10-05 1.10.0)

View file

@ -156,6 +156,7 @@ x-common-backend-variables: &common-backend-variables
DISABLE_ANONYMOUS_PUBLIC_VIEW_WS_CONNECTIONS:
MEDIA_URL:
BASEROW_EXTRA_ALLOWED_HOSTS:
BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB:
BASEROW_COUNT_ROWS_ENABLED:
services:

View file

@ -42,6 +42,9 @@ The installation methods referred to in the variable descriptions are:
| BASEROW\_BACKEND\_DEBUG | If set to “on” then will enable the non production safe debug mode for the Baserow django backend. Defaults to “off” | |
| BASEROW\_AMOUNT\_OF\_GUNICORN\_WORKERS | The number of concurrent worker processes used by the Baserow backend gunicorn server to process incoming requests
| BASEROW\_AIRTABLE\_IMPORT\_SOFT\_TIME\_LIMIT | The maximum amount of seconds an Airtable migration import job can run. | 1800 seconds - 30 minutes |
| INITIAL\_TABLE\_DATA\_LIMIT | The amount of rows that can be imported when creating a table. Defaults to empty which means unlimited rows. | |
| BASEROW\_ROW\_PAGE\_SIZE\_LIMIT | The maximum number of rows that can be requested at once. | 200 |
| BASEROW\_FILE_UPLOAD\_SIZE\_LIMIT\_MB | The max file size in MB allowed to be uploaded by users into a Baserow File Field. | 1048576 (1 TB or 1024*1024) |
### Backend Database Configuration
| Name | Description | Defaults |
@ -87,7 +90,6 @@ The installation methods referred to in the variable descriptions are:
| ------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| BASEROW\_ENABLE\_SECURE\_PROXY\_SSL\_HEADER | Set to any non-empty value to ensure Baserow generates https:// next links provided by paginated API endpoints. Baserow will still work correctly if not enabled, this is purely for giving the correct https url for clients of the API. If you have setup Baserow to use Caddy's auto HTTPS or you have put Baserow behind<br>a reverse proxy which:<br>* Handles HTTPS<br>* Strips the X-Forwarded-Proto header from all incoming requests.<br>* Sets the X-Forwarded-Proto header and sends it to Baserow.<br>Then you can safely set BASEROW\_ENABLE\_SECURE\_PROXY\_SSL\_HEADER=yes to ensure Baserow<br>generates https links for pagination correctly.<br> | |
| ADDITIONAL\_APPS | A comma separated list of additional django applications to add to the INSTALLED\_APPS django setting | |
| INITIAL\_TABLE\_DATA\_LIMIT | The amount of rows that can be imported when creating a table. Defaults to empty which means unlimited rows. | |
| HOURS\_UNTIL\_TRASH\_PERMANENTLY\_DELETED | Items from the trash will be permanently deleted after this number of hours. | |
| DISABLE\_ANONYMOUS\_PUBLIC\_VIEW\_WS\_CONNECTIONS | When sharing views publicly a websocket connection is opened to provide realtime updates to viewers of the public link. To disable this set any non empty value. When disabled publicly shared links will need to be refreshed to see any updates to the view. | |
| DJANGO\_SETTINGS\_MODULE | **INTERNAL** The settings python module to load when starting up the Backend django server. You shouldnt need to set this yourself unless you are customizing the settings manually. | |
@ -95,7 +97,6 @@ The installation methods referred to in the variable descriptions are:
| BASEROW\_BACKEND\_BIND\_ADDRESS | **INTERNAL** The address that Baserows backend service will bind to. | |
| BASEROW\_BACKEND\_PORT | **INTERNAL** Controls which port the Baserow backend service binds to. | |
| BASEROW\_WEBFRONTEND\_BIND\_ADDRESS | **INTERNAL** The address that Baserows web-frontend service will bind to. | |
| BASEROW\_ROW\_PAGE\_SIZE\_LIMIT | The maximum number of rows that can be requested at once. | 200 |
### User file upload Configuration
| Name | Description | Defaults |