mirror of
https://gitlab.com/bramw/baserow.git
synced 2025-04-11 07:51:20 +00:00
Merge branch '997-fix-uploading-files-via-url' into 'develop'
Resolve "Fix uploading files via url by default crashing due to MemoryError" Closes #997 See merge request bramw/baserow!782
This commit is contained in:
commit
db253b1e2a
9 changed files with 91 additions and 41 deletions
|
@ -29,3 +29,4 @@ autopep8==1.5.7
|
|||
pytest-unordered==0.4.1
|
||||
debugpy==1.5.1
|
||||
backports.cached-property==1.0.1
|
||||
httpretty==1.1.4
|
|
@ -71,6 +71,8 @@ gitpython==3.1.27
|
|||
# via bandit
|
||||
gprof2dot==2021.2.21
|
||||
# via django-silk
|
||||
httpretty==1.1.4
|
||||
# via -r dev.in
|
||||
icdiff==2.0.4
|
||||
# via pytest-icdiff
|
||||
idna==3.3
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import datetime
|
||||
import os
|
||||
from decimal import Decimal
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
import dj_database_url
|
||||
|
@ -449,7 +450,9 @@ MEDIA_ROOT = os.getenv("MEDIA_ROOT", "/baserow/media")
|
|||
# Indicates the directory where the user files and user thumbnails are stored.
|
||||
USER_FILES_DIRECTORY = "user_files"
|
||||
USER_THUMBNAILS_DIRECTORY = "thumbnails"
|
||||
USER_FILE_SIZE_LIMIT = 1024 * 1024 * 1024 * 1024 # ~1TB
|
||||
BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = int(
|
||||
Decimal(os.getenv("BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB", 1024 * 1024)) * 1024 * 1024
|
||||
) # ~1TB by default
|
||||
|
||||
EXPORT_FILES_DIRECTORY = "export_files"
|
||||
EXPORT_CLEANUP_INTERVAL_MINUTES = 5
|
||||
|
|
|
@ -194,9 +194,10 @@ class UserFileHandler:
|
|||
|
||||
size = stream_size(stream)
|
||||
|
||||
if size > settings.USER_FILE_SIZE_LIMIT:
|
||||
if size > settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB:
|
||||
raise FileSizeTooLargeError(
|
||||
settings.USER_FILE_SIZE_LIMIT, "The provided file is too large."
|
||||
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB,
|
||||
"The provided file is too large.",
|
||||
)
|
||||
|
||||
storage = storage or default_storage
|
||||
|
@ -294,10 +295,26 @@ class UserFileHandler:
|
|||
"The response did not respond with an " "OK status code."
|
||||
)
|
||||
|
||||
content = response.raw.read(
|
||||
settings.USER_FILE_SIZE_LIMIT + 1, decode_content=True
|
||||
)
|
||||
except (RequestException, UnacceptableAddressException):
|
||||
try:
|
||||
content_length = int(response.headers.get("Content-Length", ""))
|
||||
if content_length > settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB:
|
||||
raise FileSizeTooLargeError(
|
||||
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB,
|
||||
"The provided file is too large.",
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
content = b""
|
||||
for chunk in response.iter_content(chunk_size=None):
|
||||
content += chunk
|
||||
if len(content) > settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB:
|
||||
response.close()
|
||||
raise FileSizeTooLargeError(
|
||||
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB,
|
||||
"The provided file is too large.",
|
||||
)
|
||||
except (RequestException, UnacceptableAddressException, ConnectionError):
|
||||
raise FileURLCouldNotBeReached("The provided URL could not be reached.")
|
||||
|
||||
file = SimpleUploadedFile(file_name, content)
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
import pytest
|
||||
import responses
|
||||
from unittest.mock import patch
|
||||
from freezegun import freeze_time
|
||||
|
||||
import httpretty as httpretty
|
||||
import pytest
|
||||
from PIL import Image
|
||||
|
||||
from django.shortcuts import reverse
|
||||
from django.conf import settings
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
from django.core.files.storage import FileSystemStorage
|
||||
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
from django.shortcuts import reverse
|
||||
from freezegun import freeze_time
|
||||
from rest_framework.status import (
|
||||
HTTP_200_OK,
|
||||
HTTP_400_BAD_REQUEST,
|
||||
|
@ -42,15 +40,15 @@ def test_upload_file(api_client, data_fixture, tmpdir):
|
|||
assert response.status_code == HTTP_400_BAD_REQUEST
|
||||
assert response.json()["error"] == "ERROR_INVALID_FILE"
|
||||
|
||||
old_limit = settings.USER_FILE_SIZE_LIMIT
|
||||
settings.USER_FILE_SIZE_LIMIT = 6
|
||||
old_limit = settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB
|
||||
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = 6
|
||||
response = api_client.post(
|
||||
reverse("api:user_files:upload_file"),
|
||||
data={"file": SimpleUploadedFile("test.txt", b"Hello World")},
|
||||
format="multipart",
|
||||
HTTP_AUTHORIZATION=f"JWT {token}",
|
||||
)
|
||||
settings.USER_FILE_SIZE_LIMIT = old_limit
|
||||
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = old_limit
|
||||
assert response.status_code == HTTP_413_REQUEST_ENTITY_TOO_LARGE
|
||||
assert response.json()["error"] == "ERROR_FILE_SIZE_TOO_LARGE"
|
||||
assert response.json()["detail"] == (
|
||||
|
@ -146,7 +144,7 @@ def test_upload_file(api_client, data_fixture, tmpdir):
|
|||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@responses.activate
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_upload_file_via_url(api_client, data_fixture, tmpdir):
|
||||
user, token = data_fixture.create_user_and_token(
|
||||
email="test@test.nl", password="password", first_name="Test1"
|
||||
|
@ -168,6 +166,11 @@ def test_upload_file_via_url(api_client, data_fixture, tmpdir):
|
|||
assert response.status_code == HTTP_400_BAD_REQUEST
|
||||
assert response.json()["error"] == "ERROR_REQUEST_BODY_VALIDATION"
|
||||
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
"https://baserow.io/test2.txt",
|
||||
status=404,
|
||||
)
|
||||
response = api_client.post(
|
||||
reverse("api:user_files:upload_via_url"),
|
||||
data={"url": "https://baserow.io/test2.txt"},
|
||||
|
@ -185,17 +188,16 @@ def test_upload_file_via_url(api_client, data_fixture, tmpdir):
|
|||
assert response.status_code == HTTP_400_BAD_REQUEST
|
||||
assert response.json()["error"] == "ERROR_INVALID_FILE_URL"
|
||||
|
||||
responses.add(
|
||||
responses.GET,
|
||||
old_limit = settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB
|
||||
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = 6
|
||||
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
"http://localhost/test.txt",
|
||||
body=b"Hello World",
|
||||
body="Hello World",
|
||||
status=200,
|
||||
content_type="text/plain",
|
||||
stream=True,
|
||||
)
|
||||
|
||||
old_limit = settings.USER_FILE_SIZE_LIMIT
|
||||
settings.USER_FILE_SIZE_LIMIT = 6
|
||||
response = api_client.post(
|
||||
reverse("api:user_files:upload_via_url"),
|
||||
data={"url": "http://localhost/test.txt"},
|
||||
|
@ -203,7 +205,26 @@ def test_upload_file_via_url(api_client, data_fixture, tmpdir):
|
|||
)
|
||||
assert response.status_code == HTTP_413_REQUEST_ENTITY_TOO_LARGE
|
||||
assert response.json()["error"] == "ERROR_FILE_SIZE_TOO_LARGE"
|
||||
settings.USER_FILE_SIZE_LIMIT = old_limit
|
||||
|
||||
# If the content length is not specified then when streaming down the file we will
|
||||
# check the file size.
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
"http://localhost/test2.txt",
|
||||
body="Hello World",
|
||||
forcing_headers={"Content-Length": None},
|
||||
status=200,
|
||||
content_type="text/plain",
|
||||
)
|
||||
response = api_client.post(
|
||||
reverse("api:user_files:upload_via_url"),
|
||||
data={"url": "http://localhost/test2.txt"},
|
||||
HTTP_AUTHORIZATION=f"JWT {token}",
|
||||
)
|
||||
assert response.status_code == HTTP_413_REQUEST_ENTITY_TOO_LARGE
|
||||
assert response.json()["error"] == "ERROR_FILE_SIZE_TOO_LARGE"
|
||||
|
||||
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = old_limit
|
||||
|
||||
storage = FileSystemStorage(location=str(tmpdir), base_url="http://localhost")
|
||||
|
||||
|
@ -215,7 +236,7 @@ def test_upload_file_via_url(api_client, data_fixture, tmpdir):
|
|||
)
|
||||
response_json = response.json()
|
||||
|
||||
assert response.status_code == HTTP_200_OK
|
||||
assert response.status_code == HTTP_200_OK, response_json
|
||||
assert response_json["size"] == 11
|
||||
assert response_json["mime_type"] == "text/plain"
|
||||
assert response_json["is_image"] is False
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
import pytest
|
||||
import responses
|
||||
import string
|
||||
|
||||
from freezegun import freeze_time
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
import httpretty
|
||||
import pytest
|
||||
import responses
|
||||
from PIL import Image
|
||||
from django.conf import settings
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.files.storage import FileSystemStorage
|
||||
from freezegun import freeze_time
|
||||
|
||||
from baserow.core.models import UserFile
|
||||
from baserow.core.user_files.exceptions import (
|
||||
|
@ -89,11 +89,11 @@ def test_upload_user_file(data_fixture, tmpdir):
|
|||
with pytest.raises(InvalidFileStreamError):
|
||||
handler.upload_user_file(user, "test.txt", None, storage=storage)
|
||||
|
||||
old_limit = settings.USER_FILE_SIZE_LIMIT
|
||||
settings.USER_FILE_SIZE_LIMIT = 6
|
||||
old_limit = settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB
|
||||
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = 6
|
||||
with pytest.raises(FileSizeTooLargeError):
|
||||
handler.upload_user_file(user, "test.txt", ContentFile(b"Hello World"))
|
||||
settings.USER_FILE_SIZE_LIMIT = old_limit
|
||||
settings.BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB = old_limit
|
||||
|
||||
with freeze_time("2020-01-01 12:00"):
|
||||
user_file = handler.upload_user_file(
|
||||
|
@ -219,24 +219,26 @@ def test_upload_user_file(data_fixture, tmpdir):
|
|||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@responses.activate
|
||||
@httpretty.activate(verbose=True, allow_net_connect=False)
|
||||
def test_upload_user_file_by_url(data_fixture, tmpdir):
|
||||
user = data_fixture.create_user()
|
||||
|
||||
storage = FileSystemStorage(location=str(tmpdir), base_url="http://localhost")
|
||||
handler = UserFileHandler()
|
||||
|
||||
responses.add(
|
||||
responses.GET,
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
"https://baserow.io/test.txt",
|
||||
body=b"Hello World",
|
||||
status=200,
|
||||
content_type="text/plain",
|
||||
stream=True,
|
||||
)
|
||||
|
||||
responses.add(
|
||||
responses.GET,
|
||||
)
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
"https://baserow.io/not-found.pdf",
|
||||
status=404,
|
||||
)
|
||||
|
|
|
@ -34,6 +34,8 @@
|
|||
to running celery with the same number of processes as the number of available cores.
|
||||
* When the BASEROW_AMOUNT_OF_WORKERS env variable is set to blank, the amount of worker
|
||||
processes defaults to the number of available cores.
|
||||
* Fixed bug preventing file uploads via an url for self-hosters
|
||||
* Added new environment variable BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB
|
||||
|
||||
## Released (2022-10-05 1.10.0)
|
||||
|
||||
|
|
|
@ -156,6 +156,7 @@ x-common-backend-variables: &common-backend-variables
|
|||
DISABLE_ANONYMOUS_PUBLIC_VIEW_WS_CONNECTIONS:
|
||||
MEDIA_URL:
|
||||
BASEROW_EXTRA_ALLOWED_HOSTS:
|
||||
BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB:
|
||||
BASEROW_COUNT_ROWS_ENABLED:
|
||||
|
||||
services:
|
||||
|
|
|
@ -42,6 +42,9 @@ The installation methods referred to in the variable descriptions are:
|
|||
| BASEROW\_BACKEND\_DEBUG | If set to “on” then will enable the non production safe debug mode for the Baserow django backend. Defaults to “off” | |
|
||||
| BASEROW\_AMOUNT\_OF\_GUNICORN\_WORKERS | The number of concurrent worker processes used by the Baserow backend gunicorn server to process incoming requests
|
||||
| BASEROW\_AIRTABLE\_IMPORT\_SOFT\_TIME\_LIMIT | The maximum amount of seconds an Airtable migration import job can run. | 1800 seconds - 30 minutes |
|
||||
| INITIAL\_TABLE\_DATA\_LIMIT | The amount of rows that can be imported when creating a table. Defaults to empty which means unlimited rows. | |
|
||||
| BASEROW\_ROW\_PAGE\_SIZE\_LIMIT | The maximum number of rows that can be requested at once. | 200 |
|
||||
| BASEROW\_FILE_UPLOAD\_SIZE\_LIMIT\_MB | The max file size in MB allowed to be uploaded by users into a Baserow File Field. | 1048576 (1 TB or 1024*1024) |
|
||||
|
||||
### Backend Database Configuration
|
||||
| Name | Description | Defaults |
|
||||
|
@ -87,7 +90,6 @@ The installation methods referred to in the variable descriptions are:
|
|||
| ------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| BASEROW\_ENABLE\_SECURE\_PROXY\_SSL\_HEADER | Set to any non-empty value to ensure Baserow generates https:// next links provided by paginated API endpoints. Baserow will still work correctly if not enabled, this is purely for giving the correct https url for clients of the API. If you have setup Baserow to use Caddy's auto HTTPS or you have put Baserow behind<br>a reverse proxy which:<br>* Handles HTTPS<br>* Strips the X-Forwarded-Proto header from all incoming requests.<br>* Sets the X-Forwarded-Proto header and sends it to Baserow.<br>Then you can safely set BASEROW\_ENABLE\_SECURE\_PROXY\_SSL\_HEADER=yes to ensure Baserow<br>generates https links for pagination correctly.<br> | |
|
||||
| ADDITIONAL\_APPS | A comma separated list of additional django applications to add to the INSTALLED\_APPS django setting | |
|
||||
| INITIAL\_TABLE\_DATA\_LIMIT | The amount of rows that can be imported when creating a table. Defaults to empty which means unlimited rows. | |
|
||||
| HOURS\_UNTIL\_TRASH\_PERMANENTLY\_DELETED | Items from the trash will be permanently deleted after this number of hours. | |
|
||||
| DISABLE\_ANONYMOUS\_PUBLIC\_VIEW\_WS\_CONNECTIONS | When sharing views publicly a websocket connection is opened to provide realtime updates to viewers of the public link. To disable this set any non empty value. When disabled publicly shared links will need to be refreshed to see any updates to the view. | |
|
||||
| DJANGO\_SETTINGS\_MODULE | **INTERNAL** The settings python module to load when starting up the Backend django server. You shouldn’t need to set this yourself unless you are customizing the settings manually. | |
|
||||
|
@ -95,7 +97,6 @@ The installation methods referred to in the variable descriptions are:
|
|||
| BASEROW\_BACKEND\_BIND\_ADDRESS | **INTERNAL** The address that Baserow’s backend service will bind to. | |
|
||||
| BASEROW\_BACKEND\_PORT | **INTERNAL** Controls which port the Baserow backend service binds to. | |
|
||||
| BASEROW\_WEBFRONTEND\_BIND\_ADDRESS | **INTERNAL** The address that Baserow’s web-frontend service will bind to. | |
|
||||
| BASEROW\_ROW\_PAGE\_SIZE\_LIMIT | The maximum number of rows that can be requested at once. | 200 |
|
||||
|
||||
### User file upload Configuration
|
||||
| Name | Description | Defaults |
|
||||
|
|
Loading…
Add table
Reference in a new issue