matrix-org_synapse/synapse/util/file_consumer.py

159 lines
6.1 KiB
Python

# Copyright 2018 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import queue
from typing import Any, BinaryIO, Optional, Union, cast
from twisted.internet import threads
from twisted.internet.defer import Deferred
from twisted.internet.interfaces import IPullProducer, IPushProducer
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.types import ISynapseReactor
class BackgroundFileConsumer:
"""A consumer that writes to a file like object. Supports both push
and pull producers
Args:
file_obj: The file like object to write to. Closed when
finished.
reactor: the Twisted reactor to use
"""
# For PushProducers pause if we have this many unwritten slices
_PAUSE_ON_QUEUE_SIZE = 5
# And resume once the size of the queue is less than this
_RESUME_ON_QUEUE_SIZE = 2
def __init__(self, file_obj: BinaryIO, reactor: ISynapseReactor) -> None:
self._file_obj: BinaryIO = file_obj
self._reactor: ISynapseReactor = reactor
# Producer we're registered with
self._producer: Optional[Union[IPushProducer, IPullProducer]] = None
# True if PushProducer, false if PullProducer
self.streaming = False
# For PushProducers, indicates whether we've paused the producer and
# need to call resumeProducing before we get more data.
self._paused_producer = False
# Queue of slices of bytes to be written. When producer calls
# unregister a final None is sent.
self._bytes_queue: queue.Queue[Optional[bytes]] = queue.Queue()
# Deferred that is resolved when finished writing
#
# This is really Deferred[None], but mypy doesn't seem to like that.
self._finished_deferred: Optional[Deferred[Any]] = None
# If the _writer thread throws an exception it gets stored here.
self._write_exception: Optional[Exception] = None
def registerProducer(
self, producer: Union[IPushProducer, IPullProducer], streaming: bool
) -> None:
"""Part of IConsumer interface
Args:
producer
streaming: True if push based producer, False if pull
based.
"""
if self._producer:
raise Exception("registerProducer called twice")
self._producer = producer
self.streaming = streaming
self._finished_deferred = run_in_background(
threads.deferToThreadPool,
# mypy seems to get confused with the chaining of ParamSpec from
# run_in_background to deferToThreadPool.
#
# For Twisted trunk, ignore arg-type; for Twisted release ignore unused-ignore.
self._reactor, # type: ignore[arg-type,unused-ignore]
self._reactor.getThreadPool(), # type: ignore[arg-type,unused-ignore]
self._writer, # type: ignore[arg-type,unused-ignore]
)
if not streaming:
self._producer.resumeProducing()
def unregisterProducer(self) -> None:
"""Part of IProducer interface"""
self._producer = None
assert self._finished_deferred is not None
if not self._finished_deferred.called:
self._bytes_queue.put_nowait(None)
def write(self, write_bytes: bytes) -> None:
"""Part of IProducer interface"""
if self._write_exception:
raise self._write_exception
assert self._finished_deferred is not None
if self._finished_deferred.called:
raise Exception("consumer has closed")
self._bytes_queue.put_nowait(write_bytes)
# If this is a PushProducer and the queue is getting behind
# then we pause the producer.
if self.streaming and self._bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE:
self._paused_producer = True
assert self._producer is not None
# cast safe because `streaming` means this is an IPushProducer
cast(IPushProducer, self._producer).pauseProducing()
def _writer(self) -> None:
"""This is run in a background thread to write to the file."""
try:
while self._producer or not self._bytes_queue.empty():
# If we've paused the producer check if we should resume the
# producer.
if self._producer and self._paused_producer:
if self._bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE:
self._reactor.callFromThread(self._resume_paused_producer)
bytes = self._bytes_queue.get()
# If we get a None (or empty list) then that's a signal used
# to indicate we should check if we should stop.
if bytes:
self._file_obj.write(bytes)
# If its a pull producer then we need to explicitly ask for
# more stuff.
if not self.streaming and self._producer:
self._reactor.callFromThread(self._producer.resumeProducing)
except Exception as e:
self._write_exception = e
raise
finally:
self._file_obj.close()
def wait(self) -> "Deferred[None]":
"""Returns a deferred that resolves when finished writing to file"""
assert self._finished_deferred is not None
return make_deferred_yieldable(self._finished_deferred)
def _resume_paused_producer(self) -> None:
"""Gets called if we should resume producing after being paused"""
if self._paused_producer and self._producer:
self._paused_producer = False
self._producer.resumeProducing()