Skip to content
Snippets Groups Projects
Verified Commit 9fabb599 authored by Jonathan Weth's avatar Jonathan Weth :keyboard:
Browse files

Use Selenium for generating PDFs using chromium

parent 6340e789
No related branches found
No related tags found
1 merge request!991Add base template for plain prints and optimize PDF generation
......@@ -30,6 +30,7 @@ RUN apt-get -y update && \
eatmydata apt-get install -y --no-install-recommends \
build-essential \
chromium \
chromium-driver \
curl \
dumb-init \
gettext \
......
import base64
import os
import subprocess # noqa
from datetime import timedelta
from tempfile import TemporaryDirectory
from typing import Optional, Tuple, Union, Callable
from typing import Callable, Optional, Tuple, Union
from urllib.parse import urljoin
from django.conf import settings
......@@ -19,6 +20,7 @@ from django.utils.translation import gettext as _
from celery.result import AsyncResult
from celery_progress.backend import ProgressRecorder
from selenium import webdriver
from aleksis.core.celery import app
from aleksis.core.models import PDFFile
......@@ -26,6 +28,29 @@ from aleksis.core.util.celery_progress import recorded_task, render_progress_pag
from aleksis.core.util.core_helpers import process_custom_context_processors
def _generate_pdf_with_chromium(temp_dir, pdf_path, html_url, lang):
"""Generate a PDF file from a HTML file."""
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--kiosk-printing")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-setuid-sandbox")
chrome_options.add_argument("--dbus-stub")
chrome_options.add_argument("--temp-profile")
chrome_options.add_argument(f"--lang={lang}")
driver = webdriver.Chrome(options=chrome_options)
driver.get(html_url)
pdf = driver.execute_cdp_cmd(
"Page.printToPDF", {"printBackground": True, "preferCSSPageSize": True}
)
driver.close()
with open(pdf_path, "wb") as f:
f.write(base64.b64decode(pdf["data"]))
@recorded_task
def generate_pdf(
file_pk: int, html_url: str, recorder: ProgressRecorder, lang: Optional[str] = None
......@@ -40,26 +65,7 @@ def generate_pdf(
pdf_path = os.path.join(temp_dir, "print.pdf")
lang = lang or get_language()
# Run PDF generation using a headless Chromium
cmd = [
"chromium",
"--headless",
"--no-sandbox",
"--run-all-compositor-stages-before-draw",
"--temp-profile",
"--disable-dev-shm-usage",
"--disable-gpu",
"--disable-setuid-sandbox",
"--dbus-stub",
f"--home-dir={temp_dir}",
f"--lang={lang}",
f"--print-to-pdf={pdf_path}",
html_url,
]
res = subprocess.run(cmd) # noqa
# Let the task fail on a non-success return code
res.check_returncode()
_generate_pdf_with_chromium(temp_dir, pdf_path, html_url, lang)
# Upload PDF file to media storage
with open(pdf_path, "rb") as f:
......@@ -80,13 +86,12 @@ def process_context_for_pdf(context: Optional[dict] = None, request: Optional[Ht
processed_context = context
return processed_context
def generate_pdf_from_html(
html: str, request: Optional[HttpRequest] = None) -> Tuple[PDFFile, AsyncResult]:
html: str, request: Optional[HttpRequest] = None
) -> Tuple[PDFFile, AsyncResult]:
"""Start a PDF generation task and return the matching file object and Celery result."""
file_object = PDFFile.objects.create(
html_file=ContentFile(html.encode(), name="source.html")
)
file_object = PDFFile.objects.create(html_file=ContentFile(html.encode(), name="source.html"))
# As this method may be run in background and there is no request available,
# we have to use a predefined URL from settings then
......@@ -99,8 +104,12 @@ def generate_pdf_from_html(
return file_object, result
def generate_pdf_from_template(
template_name: str, context: Optional[dict] = None, request: Optional[HttpRequest] = None, render_method: Optional[Callable] = None
template_name: str,
context: Optional[dict] = None,
request: Optional[HttpRequest] = None,
render_method: Optional[Callable] = None,
) -> Tuple[PDFFile, AsyncResult]:
"""Start a PDF generation task and return the matching file object and Celery result."""
processed_context = process_context_for_pdf(context, request)
......
......@@ -53,6 +53,7 @@ Install some packages from the Debian package system.
yarnpkg \
python3-virtualenv \
chromium \
chromium-driver \
redis-server \
postgresql \
locales-all \
......
......@@ -47,7 +47,7 @@ Install native dependencies
Some system libraries are required to install AlekSIS. On Debian, for example, this would be done with::
sudo apt install build-essential libpq-dev libpq5 libssl-dev python3-dev python3-pip python3-venv yarnpkg gettext chromium
sudo apt install build-essential libpq-dev libpq5 libssl-dev python3-dev python3-pip python3-venv yarnpkg gettext chromium chromium-driver
Get Poetry
----------
......
......@@ -130,6 +130,7 @@ django-iconify = "^0.3"
customidenticon = "^0.1.5"
graphene-django = "^2.15.0"
django-webpack-loader = "^1.6.0"
selenium = "^4.4.3"
[tool.poetry.extras]
ldap = ["django-auth-ldap"]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment