Enhance README with detailed service description, setup instructions, and example .env configuration for the FastAPI service that integrates with Paperless-ngx and llama.cpp for PDF processing.
This commit is contained in:
28
tests/test_paperless_task_parse.py
Normal file
28
tests/test_paperless_task_parse.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from notebook_tools.paperless_client import _document_id_from_task_payload
|
||||
|
||||
|
||||
def test_related_document_string() -> None:
|
||||
assert (
|
||||
_document_id_from_task_payload(
|
||||
{
|
||||
"related_document": "10",
|
||||
"result": "Success. New document id 10 created",
|
||||
}
|
||||
)
|
||||
== 10
|
||||
)
|
||||
|
||||
|
||||
def test_result_string_only() -> None:
|
||||
assert (
|
||||
_document_id_from_task_payload(
|
||||
{"related_document": None, "result": "Success. New document id 42 created"}
|
||||
)
|
||||
== 42
|
||||
)
|
||||
|
||||
|
||||
def test_related_document_int() -> None:
|
||||
assert _document_id_from_task_payload({"related_document": 7}) == 7
|
||||
73
tests/test_pipeline_smoke.py
Normal file
73
tests/test_pipeline_smoke.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import respx
|
||||
from httpx import Response
|
||||
|
||||
from notebook_tools.pipeline import run_pipeline_for_paperless_document
|
||||
from notebook_tools.settings import Settings
|
||||
|
||||
|
||||
@respx.mock
|
||||
async def test_pipeline_smoke_single_page(monkeypatch) -> None:
|
||||
# We don't want to depend on real PDF rendering in this test.
|
||||
# Instead we monkeypatch the helpers to produce a controlled "one page" output.
|
||||
from notebook_tools import pdf_utils
|
||||
|
||||
monkeypatch.setattr(pdf_utils, "render_pdf_to_jpegs", lambda *, pdf_bytes, dpi: [b"jpegbytes"])
|
||||
monkeypatch.setattr(pdf_utils, "jpeg_to_pdf_bytes", lambda *, jpeg_bytes: b"%PDF-1.4 fake")
|
||||
# Page-number OCR now crops the JPEG; since our fake jpeg bytes aren't a real image,
|
||||
# we bypass cropping in this smoke test and rely on the default "-1" fallback.
|
||||
import notebook_tools.pipeline as pipeline
|
||||
|
||||
monkeypatch.setattr(pipeline, "_crop_bottom_corner_jpegs", lambda *, full_page_jpeg: [])
|
||||
|
||||
# Mock Paperless download/upload/patch endpoints.
|
||||
respx.get("https://paperless.local/api/documents/123/download/").mock(
|
||||
return_value=Response(200, content=b"%PDF-1.4 source")
|
||||
)
|
||||
respx.post("https://paperless.local/api/documents/post_document/").mock(
|
||||
return_value=Response(200, json="34de1527-aade-499b-8a06-dd0174c9f233")
|
||||
)
|
||||
# When post_document returns a task id, the client polls /api/tasks/?task_id=<uuid>
|
||||
respx.get("https://paperless.local/api/tasks/").mock(
|
||||
return_value=Response(200, json=[{"id": 31, "task_id": "34de1527-aade-499b-8a06-dd0174c9f233", "related_document": 999}])
|
||||
)
|
||||
patch_route = respx.patch("https://paperless.local/api/documents/999/").mock(
|
||||
return_value=Response(200, json={})
|
||||
)
|
||||
|
||||
# Mock llama OCR
|
||||
respx.post("http://llama.local/v1/chat/completions").mock(
|
||||
return_value=Response(200, json={"choices": [{"message": {"content": "OCR TEXT"}}]})
|
||||
)
|
||||
|
||||
settings = Settings(
|
||||
PAPERLESS_BASE_URL="https://paperless.local",
|
||||
PAPERLESS_TOKEN="t",
|
||||
LLAMA_BASE_URL="http://llama.local",
|
||||
LLAMA_MODEL="m",
|
||||
)
|
||||
|
||||
out = await run_pipeline_for_paperless_document(
|
||||
settings=settings,
|
||||
paperless_document_id=123,
|
||||
notebook_id="nb1",
|
||||
job_id="job1",
|
||||
on_progress=None,
|
||||
ocr_prompt_override=None,
|
||||
title_prefix="Notebook nb1",
|
||||
)
|
||||
|
||||
assert out["created_document_ids"] == [999]
|
||||
assert patch_route.called
|
||||
sent_patch = json.loads(patch_route.calls[0].request.content.decode("utf-8"))
|
||||
assert sent_patch["content"] == "OCR TEXT"
|
||||
assert sent_patch["title"] == "Notebook nb1 Page -1"
|
||||
assert sent_patch["custom_fields"][0]["field"] == 1
|
||||
assert sent_patch["custom_fields"][0]["value"] == "nb1"
|
||||
assert sent_patch["custom_fields"][1]["field"] == 2
|
||||
assert sent_patch["custom_fields"][1]["value"] == -1
|
||||
assert sent_patch["document_type"] == 3
|
||||
|
||||
Reference in New Issue
Block a user