Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: CI Docs

on:
push:
branches:
- main
pull_request:

jobs:
build-docs:
name: "Build Docs"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0 # Full history for accurate page timestamps

- uses: actions/setup-python@v6
with:
python-version: "3.12"

- name: Install package and dependencies
run: |
python -m pip install uv
uv sync --all-extras
uv pip install great-docs

- name: Set up Quarto
uses: quarto-dev/quarto-actions/setup@v2

- name: Build docs
run: uv run great-docs build

- name: Save docs artifact
uses: actions/upload-artifact@v7
with:
name: docs-html
path: great-docs/_site
include-hidden-files: true

- name: Upload build timings
uses: actions/upload-artifact@v7
with:
name: build-timings
path: great-docs/_site/build-timings.json

publish-docs:
name: "Publish Docs"
runs-on: ubuntu-latest
needs: "build-docs"
if: github.ref == 'refs/heads/main'
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- uses: actions/download-artifact@v7
with:
name: docs-html
path: great-docs/_site

- name: Upload Pages artifact
uses: actions/upload-pages-artifact@v5
with:
path: great-docs/_site
include-hidden-files: true

- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v5

preview-docs:
name: "Preview Docs"
runs-on: ubuntu-latest
needs: "build-docs"
if: github.event_name == 'pull_request'
permissions:
deployments: write
pull-requests: write
steps:
- uses: actions/download-artifact@v7
with:
name: docs-html
path: great-docs/_site

# Start deployment
- name: Configure pull release name
if: ${{ github.event_name == 'pull_request' }}
run: |
echo "RELEASE_NAME=pr-${{ github.event.number }}" >> $GITHUB_ENV

- name: Configure branch release name
if: ${{ github.event_name != 'pull_request' }}
run: |
# use branch name, but replace slashes. E.g. feat/a -> feat-a
echo "RELEASE_NAME=${GITHUB_REF_NAME//\//-}" >> $GITHUB_ENV

# Deploy
- name: Create Github Deployment
uses: bobheadxi/deployments@v1
id: deployment
if: ${{ !github.event.pull_request.head.repo.fork }}
with:
step: start
token: ${{ secrets.GITHUB_TOKEN }}
env: ${{ env.RELEASE_NAME }}
ref: ${{ github.head_ref }}
logs: "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,5 +127,5 @@ dmypy.json
.vscode/settings.json
tmp.py

# Added by Sam Edwardes
site
# Great Docs build output (ephemeral)
great-docs/
15 changes: 12 additions & 3 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,23 @@ just lint

### Documentation

The docs site is built with [Great Docs](https://posit-dev.github.io/great-docs/)
(a Quarto-based generator). Great Docs requires Python 3.11+ and the Quarto CLI, and
is run via `uvx` so it stays separate from the package's own dependencies. Content lives
in `great-docs.yml`, the `user_guide/` directory, and the package docstrings. The
landing page is generated automatically from `README.md`.

```bash
# Preview docs locally
# Preview docs locally (http://localhost:3000)
just preview-docs

# Publish docs to GitHub Pages
just publish-docs
# Build the static site into great-docs/_site
just build-docs
```

Docs are published to GitHub Pages automatically by the `CI Docs` workflow
(`.github/workflows/docs.yml`) on every push to `main` — there is no manual publish step.

### Building and Publishing

```bash
Expand Down
5 changes: 0 additions & 5 deletions docs/api/spacypdfreader.parsers.md

This file was deleted.

3 changes: 0 additions & 3 deletions docs/api/spacypdfreader.spacypdfreader.md

This file was deleted.

55 changes: 0 additions & 55 deletions docs/contributing.md

This file was deleted.

5 changes: 0 additions & 5 deletions docs/hooks.py

This file was deleted.

3 changes: 0 additions & 3 deletions docs/index.md

This file was deleted.

73 changes: 73 additions & 0 deletions great-docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Great Docs Configuration
# See https://posit-dev.github.io/great-docs/user-guide/configuration.html

# Display Name
# ------------
display_name: spacypdfreader

# Docstring Parser
# ----------------
# The docstring format used in your package (numpy, google, or sphinx)
parser: google

# Dynamic Introspection
# ---------------------
# Use runtime introspection for more accurate documentation (default: true)
dynamic: true

# Logo & Favicon
# --------------
logo: assets/user-astronaut-solid.svg

# GitHub Integration
# ------------------
# GitHub link style: "widget" (shows stars count) or "icon" (simple icon)
github_style: widget

# Site URL
# --------
# Canonical address of the deployed documentation site.
site_url: "https://samedwardes.github.io/spacypdfreader/"

# Author Information
# ------------------
authors:
- name: Sam Edwardes
role: Maintainer
email: edwardes.s@gmail.com
github: SamEdwardes
homepage: https://www.linkedin.com/in/samedwardes

# User Guide
# ----------
# Section ordering for the User Guide .qmd files in user_guide/.
user_guide:
- section: "Guides"
contents:
- parsers.qmd
- multiprocessing.qmd
- spacy-extensions.qmd
- section: "Project"
contents:
- changelog.qmd
- contributing.qmd

# Jupyter Kernel
# --------------
jupyter: python3

# API Reference Structure
# -----------------------
reference:
- title: Functions
desc: The main entry point for converting a PDF into a spaCy Doc.
contents:
- spacypdfreader.pdf_reader

- title: Parsers
desc: >-
Built-in PDF-to-text parsers. Pass one of these to the `pdf_parser`
argument of `pdf_reader`, or bring your own.
contents:
- parsers.pdfminer.parser
- parsers.pytesseract.parser
13 changes: 8 additions & 5 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,18 @@ test-matrix:
test-gha:
gh workflow run pytest.yml --ref $(git branch --show-current)

# great-docs requires Python 3.11+ and is run via uvx, separate from the
# package's own dependencies (which support Python 3.9+). The pytesseract
# extras are included so the API reference can introspect every parser module.
docs_cmd := "uvx --python 3.12 --with-editable . --with pdf2image --with pillow --with pytesseract --with great-docs great-docs"

[group('docs')]
preview-docs:
uv run mkdocs serve
{{docs_cmd}} preview

[group('docs')]
publish-docs:
rm -rf site
uv run mkdocs build
uv run mkdocs gh-deploy
build-docs:
{{docs_cmd}} build

[group('docs')]
test-docs:
Expand Down
Loading
Loading