102 lines
2.9 KiB
Python
102 lines
2.9 KiB
Python
import os
|
||
from typing import List
|
||
|
||
from pypdf import PdfReader, PdfWriter
|
||
|
||
|
||
def extract_page_as_pdf(source_path: str, page_number: int, output_path: str) -> str:
|
||
"""Extract a single page from a PDF and save as a new PDF file.
|
||
|
||
Args:
|
||
source_path: Path to original PDF file.
|
||
page_number: 1-indexed page number to extract.
|
||
output_path: Where to save the extracted page PDF.
|
||
|
||
Returns:
|
||
The output_path of the saved PDF file.
|
||
|
||
Raises:
|
||
FileNotFoundError: If source_path does not exist.
|
||
ValueError: If source is not a valid PDF or page_number is out of range.
|
||
"""
|
||
if not os.path.exists(source_path):
|
||
raise FileNotFoundError(f"Source file not found: {source_path}")
|
||
|
||
try:
|
||
reader = PdfReader(source_path)
|
||
except Exception as exc:
|
||
raise ValueError(f"Invalid PDF file: {exc}") from exc
|
||
|
||
total = len(reader.pages)
|
||
if page_number < 1 or page_number > total:
|
||
raise ValueError(
|
||
f"Page number {page_number} out of range (1–{total})"
|
||
)
|
||
|
||
writer = PdfWriter()
|
||
writer.add_page(reader.pages[page_number - 1])
|
||
|
||
os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
|
||
with open(output_path, "wb") as f:
|
||
writer.write(f)
|
||
|
||
return output_path
|
||
|
||
|
||
def extract_pages_as_pdf(
|
||
source_path: str,
|
||
page_numbers: List[int],
|
||
output_dir: str,
|
||
filename_stem: str,
|
||
) -> List[str]:
|
||
"""Extract multiple pages from a PDF, saving each as a separate PDF.
|
||
|
||
Naming convention: {filename_stem}_page_{page_number}.pdf
|
||
|
||
Args:
|
||
source_path: Path to original PDF file.
|
||
page_numbers: List of 1-indexed page numbers to extract.
|
||
output_dir: Directory to save extracted PDFs.
|
||
filename_stem: Base name for output files (e.g. "NEC4 ACC").
|
||
|
||
Returns:
|
||
List of output file paths (relative to output_dir).
|
||
|
||
Raises:
|
||
FileNotFoundError: If source_path does not exist.
|
||
ValueError: If source is not a valid PDF or any page_number is out of range.
|
||
"""
|
||
if not page_numbers:
|
||
return []
|
||
|
||
if not os.path.exists(source_path):
|
||
raise FileNotFoundError(f"Source file not found: {source_path}")
|
||
|
||
try:
|
||
reader = PdfReader(source_path)
|
||
except Exception as exc:
|
||
raise ValueError(f"Invalid PDF file: {exc}") from exc
|
||
|
||
total = len(reader.pages)
|
||
for pn in page_numbers:
|
||
if pn < 1 or pn > total:
|
||
raise ValueError(
|
||
f"Page number {pn} out of range (1–{total})"
|
||
)
|
||
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
output_filenames: List[str] = []
|
||
for pn in page_numbers:
|
||
filename = f"{filename_stem}_page_{pn}.pdf"
|
||
full_path = os.path.join(output_dir, filename)
|
||
|
||
writer = PdfWriter()
|
||
writer.add_page(reader.pages[pn - 1])
|
||
with open(full_path, "wb") as f:
|
||
writer.write(f)
|
||
|
||
output_filenames.append(filename)
|
||
|
||
return output_filenames
|