Source code for twisted.web.wsgi

# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.

"""
An implementation of
U{Python Web Server Gateway Interface v1.0.1<http://www.python.org/dev/peps/pep-3333/>}.
"""

from collections.abc import Sequence
from sys import exc_info
from typing import List, Union
from warnings import warn

from zope.interface import implementer

from twisted.internet.threads import blockingCallFromThread
from twisted.logger import Logger
from twisted.python.failure import Failure
from twisted.web.http import INTERNAL_SERVER_ERROR
from twisted.web.resource import IResource
from twisted.web.server import NOT_DONE_YET


# PEP-3333 -- which has superseded PEP-333 -- states that text strings MUST
# be represented using the platform's native string type, limited to
# characters defined in ISO-8859-1. Byte strings are used only for values
# read from wsgi.input, passed to write() or yielded by the application.
#
# Put another way:
#
# - All text strings are of type str, and all binary data are of
#   type bytes. Text MUST always be limited to that which can be encoded as
#   ISO-8859-1, U+0000 to U+00FF inclusive.
#
# The following pair of functions -- _wsgiString() and _wsgiStringToBytes() --
# are used to make Twisted's WSGI support compliant with the standard.
def _wsgiString(string: Union[str, bytes]) -> str:
    """
    Convert C{string} to a WSGI "bytes-as-unicode" string.

    If it's a byte string, decode as ISO-8859-1. If it's a Unicode string,
    round-trip it to bytes and back using ISO-8859-1 as the encoding.

    @type string: C{str} or C{bytes}
    @rtype: C{str}

    @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars.
    """
    if isinstance(string, str):
        return string.encode("iso-8859-1").decode("iso-8859-1")
    else:
        return string.decode("iso-8859-1")


def _wsgiStringToBytes(string: str) -> bytes:
    """
    Convert C{string} from a WSGI "bytes-as-unicode" string to an
    ISO-8859-1 byte string.

    @type string: C{str}
    @rtype: C{bytes}

    @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars.
    """
    return string.encode("iso-8859-1")


class _ErrorStream:
    """
    File-like object instances of which are used as the value for the
    C{'wsgi.errors'} key in the C{environ} dictionary passed to the application
    object.

    This simply passes writes on to L{logging<twisted.logger>} system as
    error events from the C{'wsgi'} system.  In the future, it may be desirable
    to expose more information in the events it logs, such as the application
    object which generated the message.
    """

    _log = Logger()

    def write(self, data: str) -> None:
        """
        Generate an event for the logging system with the given bytes as the
        message.

        This is called in a WSGI application thread, not the I/O thread.

        @type data: str

        @raise TypeError: if C{data} is not a native string.
        """
        if not isinstance(data, str):
            raise TypeError(
                "write() argument must be str, not %r (%s)"
                % (data, type(data).__name__)
            )

        # Note that in old style, message was a tuple.  logger._legacy
        # will overwrite this value if it is not properly formatted here.
        self._log.error(data, system="wsgi", isError=True, message=(data,))

    def writelines(self, iovec: List[str]) -> None:
        """
        Join the given lines and pass them to C{write} to be handled in the
        usual way.

        This is called in a WSGI application thread, not the I/O thread.

        @param iovec: A C{list} of C{'\\n'}-terminated C{str} which will be
            logged.

        @raise TypeError: if C{iovec} contains any non-native strings.
        """
        self.write("".join(iovec))

    def flush(self):
        """
        Nothing is buffered, so flushing does nothing.  This method is required
        to exist by PEP 333, though.

        This is called in a WSGI application thread, not the I/O thread.
        """


class _InputStream:
    """
    File-like object instances of which are used as the value for the
    C{'wsgi.input'} key in the C{environ} dictionary passed to the application
    object.

    This only exists to make the handling of C{readline(-1)} consistent across
    different possible underlying file-like object implementations.  The other
    supported methods pass through directly to the wrapped object.
    """

    def __init__(self, input):
        """
        Initialize the instance.

        This is called in the I/O thread, not a WSGI application thread.
        """
        self._wrapped = input

    def read(self, size=None):
        """
        Pass through to the underlying C{read}.

        This is called in a WSGI application thread, not the I/O thread.
        """
        # Avoid passing None because cStringIO and file don't like it.
        if size is None:
            return self._wrapped.read()
        return self._wrapped.read(size)

    def readline(self, size=None):
        """
        Pass through to the underlying C{readline}, with a size of C{-1} replaced
        with a size of L{None}.

        This is called in a WSGI application thread, not the I/O thread.
        """
        # Check for -1 because StringIO doesn't handle it correctly.  Check for
        # None because files and tempfiles don't accept that.
        if size == -1 or size is None:
            return self._wrapped.readline()
        return self._wrapped.readline(size)

    def readlines(self, size=None):
        """
        Pass through to the underlying C{readlines}.

        This is called in a WSGI application thread, not the I/O thread.
        """
        # Avoid passing None because cStringIO and file don't like it.
        if size is None:
            return self._wrapped.readlines()
        return self._wrapped.readlines(size)

    def __iter__(self):
        """
        Pass through to the underlying C{__iter__}.

        This is called in a WSGI application thread, not the I/O thread.
        """
        return iter(self._wrapped)


class _WSGIResponse:
    """
    Helper for L{WSGIResource} which drives the WSGI application using a
    threadpool and hooks it up to the L{http.Request}.

    @ivar started: A L{bool} indicating whether or not the response status and
        headers have been written to the request yet.  This may only be read or
        written in the WSGI application thread.

    @ivar reactor: An L{IReactorThreads} provider which is used to call methods
        on the request in the I/O thread.

    @ivar threadpool: A L{ThreadPool} which is used to call the WSGI
        application object in a non-I/O thread.

    @ivar application: The WSGI application object.

    @ivar request: The L{http.Request} upon which the WSGI environment is
        based and to which the application's output will be sent.

    @ivar environ: The WSGI environment L{dict}.

    @ivar status: The HTTP response status L{str} supplied to the WSGI
        I{start_response} callable by the application.

    @ivar headers: A list of HTTP response headers supplied to the WSGI
        I{start_response} callable by the application.

    @ivar _requestFinished: A flag which indicates whether it is possible to
        generate more response data or not.  This is L{False} until
        L{http.Request.notifyFinish} tells us the request is done,
        then L{True}.
    """

    _requestFinished = False
    _log = Logger()

    def __init__(self, reactor, threadpool, application, request):
        self.started = False
        self.reactor = reactor
        self.threadpool = threadpool
        self.application = application
        self.request = request
        self.request.notifyFinish().addBoth(self._finished)

        if request.prepath:
            scriptName = b"/" + b"/".join(request.prepath)
        else:
            scriptName = b""

        if request.postpath:
            pathInfo = b"/" + b"/".join(request.postpath)
        else:
            pathInfo = b""

        parts = request.uri.split(b"?", 1)
        if len(parts) == 1:
            queryString = b""
        else:
            queryString = parts[1]

        # All keys and values need to be native strings, i.e. of type str in
        # *both* Python 2 and Python 3, so says PEP-3333.
        remotePeer = request.getClientAddress()
        self.environ = {
            "REQUEST_METHOD": _wsgiString(request.method),
            "REMOTE_ADDR": _wsgiString(remotePeer.host),
            "REMOTE_PORT": _wsgiString(str(remotePeer.port)),
            "SCRIPT_NAME": _wsgiString(scriptName),
            "PATH_INFO": _wsgiString(pathInfo),
            "QUERY_STRING": _wsgiString(queryString),
            "CONTENT_TYPE": _wsgiString(request.getHeader(b"content-type") or ""),
            "CONTENT_LENGTH": _wsgiString(request.getHeader(b"content-length") or ""),
            "SERVER_NAME": _wsgiString(request.getRequestHostname()),
            "SERVER_PORT": _wsgiString(str(request.getHost().port)),
            "SERVER_PROTOCOL": _wsgiString(request.clientproto),
        }

        # The application object is entirely in control of response headers;
        # disable the default Content-Type value normally provided by
        # twisted.web.server.Request.
        self.request.defaultContentType = None

        for name, values in request.requestHeaders.getAllRawHeaders():
            name = "HTTP_" + _wsgiString(name).upper().replace("-", "_")
            # It might be preferable for http.HTTPChannel to clear out
            # newlines.
            self.environ[name] = ",".join(_wsgiString(v) for v in values).replace(
                "\n", " "
            )

        self.environ.update(
            {
                "wsgi.version": (1, 0),
                "wsgi.url_scheme": request.isSecure() and "https" or "http",
                "wsgi.run_once": False,
                "wsgi.multithread": True,
                "wsgi.multiprocess": False,
                "wsgi.errors": _ErrorStream(),
                # Attend: request.content was owned by the I/O thread up until
                # this point.  By wrapping it and putting the result into the
                # environment dictionary, it is effectively being given to
                # another thread.  This means that whatever it is, it has to be
                # safe to access it from two different threads.  The access
                # *should* all be serialized (first the I/O thread writes to
                # it, then the WSGI thread reads from it, then the I/O thread
                # closes it).  However, since the request is made available to
                # arbitrary application code during resource traversal, it's
                # possible that some other code might decide to use it in the
                # I/O thread concurrently with its use in the WSGI thread.
                # More likely than not, this will break.  This seems like an
                # unlikely possibility to me, but if it is to be allowed,
                # something here needs to change. -exarkun
                "wsgi.input": _InputStream(request.content),
            }
        )

    def _finished(self, ignored):
        """
        Record the end of the response generation for the request being
        serviced.
        """
        self._requestFinished = True

    def startResponse(self, status, headers, excInfo=None):
        """
        The WSGI I{start_response} callable.  The given values are saved until
        they are needed to generate the response.

        This will be called in a non-I/O thread.
        """
        if self.started and excInfo is not None:
            raise excInfo[1].with_traceback(excInfo[2])

        # PEP-3333 mandates that status should be a native string. In practice
        # this is mandated by Twisted's HTTP implementation too.
        if not isinstance(status, str):
            raise TypeError(
                "status must be str, not {!r} ({})".format(
                    status, type(status).__name__
                )
            )

        # PEP-3333 mandates that headers should be a plain list, but in
        # practice we work with any sequence type and only warn when it's not
        # a plain list.
        if isinstance(headers, list):
            pass  # This is okay.
        elif isinstance(headers, Sequence):
            warn(
                "headers should be a list, not %r (%s)"
                % (headers, type(headers).__name__),
                category=RuntimeWarning,
            )
        else:
            raise TypeError(
                "headers must be a list, not %r (%s)"
                % (headers, type(headers).__name__)
            )

        # PEP-3333 mandates that each header should be a (str, str) tuple, but
        # in practice we work with any sequence type and only warn when it's
        # not a plain list.
        for header in headers:
            if isinstance(header, tuple):
                pass  # This is okay.
            elif isinstance(header, Sequence):
                warn(
                    "header should be a (str, str) tuple, not %r (%s)"
                    % (header, type(header).__name__),
                    category=RuntimeWarning,
                )
            else:
                raise TypeError(
                    "header must be a (str, str) tuple, not %r (%s)"
                    % (header, type(header).__name__)
                )

            # However, the sequence MUST contain only 2 elements.
            if len(header) != 2:
                raise TypeError(f"header must be a (str, str) tuple, not {header!r}")

            # Both elements MUST be native strings. Non-native strings will be
            # rejected by the underlying HTTP machinery in any case, but we
            # reject them here in order to provide a more informative error.
            for elem in header:
                if not isinstance(elem, str):
                    raise TypeError(f"header must be (str, str) tuple, not {header!r}")

        self.status = status
        self.headers = headers
        return self.write

    def write(self, data):
        """
        The WSGI I{write} callable returned by the I{start_response} callable.
        The given bytes will be written to the response body, possibly flushing
        the status and headers first.

        This will be called in a non-I/O thread.
        """
        # PEP-3333 states:
        #
        #   The server or gateway must transmit the yielded bytestrings to the
        #   client in an unbuffered fashion, completing the transmission of
        #   each bytestring before requesting another one.
        #
        # This write() method is used for the imperative and (indirectly) for
        # the more familiar iterable-of-bytestrings WSGI mechanism. It uses
        # C{blockingCallFromThread} to schedule writes. This allows exceptions
        # to propagate up from the underlying HTTP implementation. However,
        # that underlying implementation does not, as yet, provide any way to
        # know if the written data has been transmitted, so this method
        # violates the above part of PEP-3333.
        #
        # PEP-3333 also says that a server may:
        #
        #   Use a different thread to ensure that the block continues to be
        #   transmitted while the application produces the next block.
        #
        # Which suggests that this is actually compliant with PEP-3333,
        # because writes are done in the reactor thread.
        #
        # However, providing some back-pressure may nevertheless be a Good
        # Thing at some point in the future.

        def wsgiWrite(started):
            if not started:
                self._sendResponseHeaders()
            self.request.write(data)

        try:
            return blockingCallFromThread(self.reactor, wsgiWrite, self.started)
        finally:
            self.started = True

    def _sendResponseHeaders(self):
        """
        Set the response code and response headers on the request object, but
        do not flush them.  The caller is responsible for doing a write in
        order for anything to actually be written out in response to the
        request.

        This must be called in the I/O thread.
        """
        code, message = self.status.split(None, 1)
        code = int(code)
        self.request.setResponseCode(code, _wsgiStringToBytes(message))

        for name, value in self.headers:
            # Don't allow the application to control these required headers.
            if name.lower() not in ("server", "date"):
                self.request.responseHeaders.addRawHeader(
                    _wsgiStringToBytes(name), _wsgiStringToBytes(value)
                )

    def start(self):
        """
        Start the WSGI application in the threadpool.

        This must be called in the I/O thread.
        """
        self.threadpool.callInThread(self.run)

    def run(self):
        """
        Call the WSGI application object, iterate it, and handle its output.

        This must be called in a non-I/O thread (ie, a WSGI application
        thread).
        """
        try:
            appIterator = self.application(self.environ, self.startResponse)
            for elem in appIterator:
                if elem:
                    self.write(elem)
                if self._requestFinished:
                    break
            close = getattr(appIterator, "close", None)
            if close is not None:
                close()
        except BaseException:

            def wsgiError(started, type, value, traceback):
                self._log.failure(
                    "WSGI application error", failure=Failure(value, type, traceback)
                )
                if started:
                    self.request.loseConnection()
                else:
                    self.request.setResponseCode(INTERNAL_SERVER_ERROR)
                    self.request.finish()

            self.reactor.callFromThread(wsgiError, self.started, *exc_info())
        else:

            def wsgiFinish(started):
                if not self._requestFinished:
                    if not started:
                        self._sendResponseHeaders()
                    self.request.finish()

            self.reactor.callFromThread(wsgiFinish, self.started)
        self.started = True


[docs] @implementer(IResource) class WSGIResource: """ An L{IResource} implementation which delegates responsibility for all resources hierarchically inferior to it to a WSGI application. The C{environ} argument passed to the application, includes the C{REMOTE_PORT} key to complement the C{REMOTE_ADDR} key. @ivar _reactor: An L{IReactorThreads} provider which will be passed on to L{_WSGIResponse} to schedule calls in the I/O thread. @ivar _threadpool: A L{ThreadPool} which will be passed on to L{_WSGIResponse} to run the WSGI application object. @ivar _application: The WSGI application object. """ # Further resource segments are left up to the WSGI application object to # handle. isLeaf = True
[docs] def __init__(self, reactor, threadpool, application): self._reactor = reactor self._threadpool = threadpool self._application = application
[docs] def render(self, request): """ Turn the request into the appropriate C{environ} C{dict} suitable to be passed to the WSGI application object and then pass it on. The WSGI application object is given almost complete control of the rendering process. C{NOT_DONE_YET} will always be returned in order and response completion will be dictated by the application object, as will the status, headers, and the response body. """ response = _WSGIResponse( self._reactor, self._threadpool, self._application, request ) response.start() return NOT_DONE_YET
[docs] def getChildWithDefault(self, name, request): """ Reject attempts to retrieve a child resource. All path segments beyond the one which refers to this resource are handled by the WSGI application object. """ raise RuntimeError("Cannot get IResource children from WSGIResource")
[docs] def putChild(self, path, child): """ Reject attempts to add a child resource to this resource. The WSGI application object handles all path segments beneath this resource, so L{IResource} children can never be found. """ raise RuntimeError("Cannot put IResource children under WSGIResource")
__all__ = ["WSGIResource"]