📝 update database docs and improve cookie parsing robustness

This commit is contained in:
Joe Kaufeld 2025-10-11 21:49:15 -04:00
parent b0e69727e2
commit 66a5f81230
5 changed files with 113 additions and 19 deletions

View file

@ -62,11 +62,12 @@ Spiderweb provides five types of responses out of the box:
### Database Agnosticism (Mostly) ### Database Agnosticism (Mostly)
One of the largest selling points of Django is the Django Object Relational Mapper (ORM); while there's nothing that compares to it in functionality, there are many other ORMs and database management solutions for developers to choose from. Spiderweb persists its internal data (like sessions) using Advanced Alchemy built on SQLAlchemy. Your application can use this same setup out of the box, or bring any ORM you prefer.
In order to use a database internally (and since this is not about writing an ORM too), Spiderweb depends on [peewee, a small ORM](https://github.com/coleifer/peewee). Applications using Spiderweb are more than welcome to use peewee models with first-class support or use whatever they're familiar with. Peewee supports PostgreSQL, MySQL, Sqlite, and CockroachDB; if you use one of these, Spiderweb can create the tables it needs in your database and stay out of the way. By default, Spiderweb creates a sqlite database in the application directory for its own use. - By default, Spiderweb creates a SQLite database file `spiderweb.db` next to your app.
- You can pass the `db` argument to `SpiderwebRouter` as a filesystem path (for SQLite), a SQLAlchemy database URL string, or a SQLAlchemy Engine instance.
> [Read more about the using a database in Spiderweb](db.md) > [Read more about databases and migrations](db.md)
### Easy to configure ### Easy to configure
@ -129,4 +130,4 @@ Here's a non-exhaustive list of things this can do:
- Database support (using Peewee, but you can use whatever you want as long as there's a Peewee driver for it) - Database support (using Peewee, but you can use whatever you want as long as there's a Peewee driver for it)
- Tests (currently a little over 80% coverage) - Tests (currently a little over 80% coverage)
[^1]: I mostly succeeded. The way that I'm approaching this is that I did my level best, then looked at (and copied) existing solutions where necessary. At the time of this writing, I did all of it solo except for the CORS middleware. [Read more about it here.](middleware/cors.md) [^1]: I mostly succeeded. The way that I'm approaching this is that I did my level best, then looked at (and copied) existing solutions where necessary. At the time of this writing, I did all of it solo except for the CORS middleware. [Read more about it here.](middleware/cors.md)

View file

@ -48,12 +48,14 @@ def form(request):
app = SpiderwebRouter( app = SpiderwebRouter(
templates_dirs=["templates"], templates_dirs=["templates"],
middleware=[ middleware=[
"spiderweb.middleware.sessions.SessionMiddleware",
"spiderweb.middleware.csrf.CSRFMiddleware", "spiderweb.middleware.csrf.CSRFMiddleware",
"example_middleware.TestMiddleware", "example_middleware.TestMiddleware",
"example_middleware.RedirectMiddleware", "example_middleware.RedirectMiddleware",
"example_middleware.ExplodingMiddleware", "example_middleware.ExplodingMiddleware",
], ],
staticfiles_dirs=["static_files"], staticfiles_dirs=["static_files"],
debug=True,
routes=[ routes=[
("/", index), ("/", index),
("/redirect", redirect), ("/redirect", redirect),

View file

@ -10,7 +10,8 @@ from typing import Optional, Callable, Sequence, Literal
from wsgiref.simple_server import WSGIServer from wsgiref.simple_server import WSGIServer
from jinja2 import BaseLoader, FileSystemLoader from jinja2 import BaseLoader, FileSystemLoader
from peewee import Database, SqliteDatabase from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
from spiderweb.middleware import MiddlewareMixin from spiderweb.middleware import MiddlewareMixin
from spiderweb.constants import ( from spiderweb.constants import (
@ -18,11 +19,10 @@ from spiderweb.constants import (
DEFAULT_CORS_ALLOW_HEADERS, DEFAULT_CORS_ALLOW_HEADERS,
) )
from spiderweb.constants import ( from spiderweb.constants import (
DATABASE_PROXY,
DEFAULT_ENCODING, DEFAULT_ENCODING,
DEFAULT_ALLOWED_METHODS, DEFAULT_ALLOWED_METHODS,
) )
from spiderweb.db import SpiderwebModel from spiderweb.db import Base, create_sqlite_engine, create_session_factory
from spiderweb.default_views import ( from spiderweb.default_views import (
http403, # noqa: F401 http403, # noqa: F401
http404, # noqa: F401 http404, # noqa: F401
@ -67,7 +67,7 @@ class SpiderwebRouter(LocalServerMixin, MiddlewareMixin, RoutesMixin, FernetMixi
cors_allow_credentials: bool = False, cors_allow_credentials: bool = False,
cors_allow_private_network: bool = False, cors_allow_private_network: bool = False,
csrf_trusted_origins: Sequence[str] = None, csrf_trusted_origins: Sequence[str] = None,
db: Optional[Database] = None, db: Optional[Engine | str] = None,
debug: bool = False, debug: bool = False,
gzip_compression_level: int = 6, gzip_compression_level: int = 6,
gzip_minimum_response_length: int = 500, gzip_minimum_response_length: int = 500,
@ -148,12 +148,21 @@ class SpiderwebRouter(LocalServerMixin, MiddlewareMixin, RoutesMixin, FernetMixi
self.init_fernet() self.init_fernet()
self.init_middleware() self.init_middleware()
self.db = db or SqliteDatabase(self.BASE_DIR / "spiderweb.db") # Database setup (SQLAlchemy)
# give the models the db connection if isinstance(db, Engine):
DATABASE_PROXY.initialize(self.db) self.db_engine = db
self.db.create_tables(SpiderwebModel.__subclasses__()) elif isinstance(db, str):
for model in SpiderwebModel.__subclasses__(): # treat as URL if it looks like one, otherwise as a filesystem path
model.check_for_needed_migration() if "://" in db:
self.db_engine = create_engine(db, future=True)
else:
self.db_engine = create_sqlite_engine(self.BASE_DIR / db)
else:
self.db_engine = create_sqlite_engine(self.BASE_DIR / "spiderweb.db")
self.db_session_factory = create_session_factory(self.db_engine)
# Create internal tables (e.g., sessions)
Base.metadata.create_all(self.db_engine)
if self.routes: if self.routes:
self.add_routes() self.add_routes()
@ -265,6 +274,10 @@ class SpiderwebRouter(LocalServerMixin, MiddlewareMixin, RoutesMixin, FernetMixi
server=self, server=self,
) )
def get_db_session(self):
"""Return a new SQLAlchemy session bound to the application's engine."""
return self.db_session_factory()
def send_error_response( def send_error_response(
self, start_response, request: Request, e: SpiderwebNetworkException self, start_response, request: Request, e: SpiderwebNetworkException
): ):

View file

@ -80,11 +80,21 @@ class Request:
self.META["client_address"] = get_client_address(self.environ) self.META["client_address"] = get_client_address(self.environ)
def populate_cookies(self) -> None: def populate_cookies(self) -> None:
if cookies := self.environ.get("HTTP_COOKIE"): cookies_header = self.environ.get("HTTP_COOKIE")
self.COOKIES = { if not cookies_header:
option.split("=")[0]: option.split("=")[1] return
for option in cookies.split("; ") cookies: dict[str, str] = {}
} # Split on ';' and be tolerant of optional spaces and malformed segments
for segment in cookies_header.split(";"):
part = segment.strip()
if not part:
continue
if "=" not in part:
# Ignore flag-like segments that don't conform to name=value
continue
name, _, value = part.partition("=") # only split on first '='
cookies[name.strip()] = value.strip()
self.COOKIES = cookies
def json(self): def json(self):
return json.loads(self.content) return json.loads(self.content)

View file

@ -147,3 +147,71 @@ def test_setting_multiple_cookies():
app(environ, start_response) app(environ, start_response)
assert start_response.headers[-1] == ("set-cookie", "cookie2=value2") assert start_response.headers[-1] == ("set-cookie", "cookie2=value2")
assert start_response.headers[-2] == ("set-cookie", "cookie1=value1") assert start_response.headers[-2] == ("set-cookie", "cookie1=value1")
import json as _json
@pytest.mark.parametrize(
"cookie_header,expected",
[
("", {}),
(" ", {}),
(";", {}),
(";; ; ", {}),
("a=1", {"a": "1"}),
("a=1; b=2", {"a": "1", "b": "2"}),
("a=1; b", {"a": "1"}), # flag-like segment ignored
("flag", {}), # single flag ignored
("a=1; flag; c=3", {"a": "1", "c": "3"}),
("a=1; c=", {"a": "1", "c": ""}), # empty value allowed
("token=abc=def==", {"token": "abc=def=="}), # values may contain '='
(" d = q ", {"d": "q"}), # tolerate spaces around name/value
("a=1; ; ; c=3", {"a": "1", "c": "3"}), # empty segments ignored
("a=1; a=2", {"a": "2"}), # last duplicate wins
("q=\"a b c\"", {"q": '"a b c"'}), # quotes preserved
("u=hello%3Dworld", {"u": "hello%3Dworld"}), # url-encoded preserved
("=novalue; a=1", {"": "novalue", "a": "1"}), # empty name retained per current parser
("lead=1; ; trail=2;", {"lead": "1", "trail": "2"}),
(" spaced = value ; another= thing ", {"spaced": "value", "another": "thing"}),
("a=1; b=2; flag; c=; token=abc=def==; d = q ; ;", {"a": "1", "b": "2", "c": "", "token": "abc=def==", "d": "q"}),
],
ids=[
"empty",
"space-only",
"single-semicolon",
"many-empty",
"single-pair",
"two-pairs",
"flag-after",
"single-flag",
"mix-flag",
"empty-value",
"value-with-equals",
"spaces-around",
"ignore-empty-segments",
"duplicate-last-wins",
"quoted-value",
"url-encoded",
"empty-name",
"lead-trail-with-empties",
"spaces-around-multi",
"mixed-case-from-original",
],
)
def test_cookie_parsing_tolerates_malformed_segments(cookie_header, expected):
app, environ, start_response = setup()
from spiderweb.response import JsonResponse
@app.route("/")
def index(request):
return JsonResponse(data=request.COOKIES)
environ["HTTP_COOKIE"] = cookie_header
body = app(environ, start_response)[0].decode("utf-8")
data = _json.loads(body)
assert data == expected