Source code for skelmis.docx.utility.to_pdf

import json
import logging
import secrets
import shutil
import subprocess
import sys
from pathlib import Path

log = logging.getLogger(__name__)


def _update_toc_linux(docx_file: Path) -> None:
    """TOC bindings for linux"""
    # This method hangs if item is already open, so we cheat a little here
    tmp_file = str(docx_file) + f".{secrets.token_hex(4)}.docx"
    tmp_file = Path(tmp_file)
    shutil.copy(docx_file, tmp_file)

    # Source: https://github.com/python-openxml/python-docx/issues/1207#issuecomment-1924053420
    subprocess.call(
        [
            "libreoffice",
            "--headless",
            f"macro:///Standard.Module1.UpdateTOC({str(tmp_file)})",
        ],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
    )

    shutil.copy(tmp_file, docx_file)
    tmp_file.unlink()


def _create_pdf_windows(docx_file: Path) -> None:
    import win32com.client

    word = win32com.client.Dispatch("Word.Application")
    wdFormatPDF = 17

    docx_filepath = docx_file
    pdf_filepath = Path(f"{docx_file.stem}.pdf").absolute().resolve()
    doc = word.Documents.Open(str(docx_filepath))
    try:
        doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF)
    except:
        raise
    finally:
        doc.Close(0)

    word.Quit()


def _create_pdf_linux(docx_file: Path) -> None:
    try:
        subprocess.call(
            [
                "libreoffice",
                "--convert-to",
                "pdf",
                str(docx_file),
            ],
            timeout=5,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
    except subprocess.TimeoutExpired:
        # New versions of LibreOffice appear to
        # hang even after the PDF are created hence
        # why we enforce a process timeout
        log.debug(
            "DOCX to PDF call timed out after 5 seconds. "
            "This is likely fine, but if not please open an issue."
        )


def _create_pdf_macos(docx_file: Path) -> None:
    log.warning("DOCX -> PDF on mac is untested. Any issues please raise an issue.")
    script = (Path(__file__).parent / "convert.jxa").absolute().resolve()
    cmd = [
        "/usr/bin/osascript",
        "-l",
        "JavaScript",
        str(script),
        str(docx_file),
        str(Path(f"{docx_file.stem}.pdf").resolve()),
    ]

    process = subprocess.Popen(cmd, stderr=subprocess.PIPE)
    process.wait()
    if process.returncode != 0:
        msg = process.stderr.read().decode().rstrip()
        if "application can't be found" in msg.lower():
            raise EnvironmentError("Microsoft Word is not available.")
        raise RuntimeError(msg)

    def stderr_results(process):
        while True:
            output_line = process.stderr.readline().rstrip()
            if not output_line:
                break
            yield output_line.decode("utf-8")

    for line in stderr_results(process):
        try:
            msg = json.loads(line)
        except ValueError:
            continue
        if msg["result"] == "error":
            print(msg)
            sys.exit(1)


[docs] def export_libre_macro( macro_folder: Path = Path("~/.config/libreoffice/4/user/basic/Standard"), ) -> None: """Automatically moves the LibreOffice macro file to `macro_folder`. Warning, this overrides Module1.xba :py:class:`Path` is where your macros live """ macro_folder = macro_folder.expanduser() module_file = Path(__file__).parent.absolute().resolve() / "Module1.xba" shutil.copy(module_file, macro_folder)
[docs] def update_toc(docx_file: Path | str) -> None: """Update a TOC within a word document. If you are on linux, please call `export_libre_macro` first. """ if isinstance(docx_file, str): docx_file = Path(docx_file) docx_file = docx_file.absolute().resolve() if sys.platform == "linux": _update_toc_linux(docx_file) elif sys.platform == "win32": raise ValueError("Windows is not yet implemented yet.") else: raise ValueError(f"{sys.platform} is not implemented")
[docs] def document_to_pdf(docx_file: Path | str) -> None: """Create a PDF from a word document. Consider calling the relevant API's yourself if you need to add extra context to calls such as watermark arguments. """ if isinstance(docx_file, str): docx_file = Path(docx_file) docx_file = docx_file.absolute() if sys.platform == "linux": _create_pdf_linux(docx_file) elif sys.platform == "win32": _create_pdf_windows(docx_file) elif sys.platform == "darwin": _create_pdf_macos(docx_file) else: raise ValueError(f"{sys.platform} is not implemented")