import os from typing import List from pypdf import PdfReader, PdfWriter def extract_page_as_pdf(source_path: str, page_number: int, output_path: str) -> str: """Extract a single page from a PDF and save as a new PDF file. Args: source_path: Path to original PDF file. page_number: 1-indexed page number to extract. output_path: Where to save the extracted page PDF. Returns: The output_path of the saved PDF file. Raises: FileNotFoundError: If source_path does not exist. ValueError: If source is not a valid PDF or page_number is out of range. """ if not os.path.exists(source_path): raise FileNotFoundError(f"Source file not found: {source_path}") try: reader = PdfReader(source_path) except Exception as exc: raise ValueError(f"Invalid PDF file: {exc}") from exc total = len(reader.pages) if page_number < 1 or page_number > total: raise ValueError( f"Page number {page_number} out of range (1–{total})" ) writer = PdfWriter() writer.add_page(reader.pages[page_number - 1]) os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True) with open(output_path, "wb") as f: writer.write(f) return output_path def extract_pages_as_pdf( source_path: str, page_numbers: List[int], output_dir: str, filename_stem: str, ) -> List[str]: """Extract multiple pages from a PDF, saving each as a separate PDF. Naming convention: {filename_stem}_page_{page_number}.pdf Args: source_path: Path to original PDF file. page_numbers: List of 1-indexed page numbers to extract. output_dir: Directory to save extracted PDFs. filename_stem: Base name for output files (e.g. "NEC4 ACC"). Returns: List of output file paths (relative to output_dir). Raises: FileNotFoundError: If source_path does not exist. ValueError: If source is not a valid PDF or any page_number is out of range. """ if not page_numbers: return [] if not os.path.exists(source_path): raise FileNotFoundError(f"Source file not found: {source_path}") try: reader = PdfReader(source_path) except Exception as exc: raise ValueError(f"Invalid PDF file: {exc}") from exc total = len(reader.pages) for pn in page_numbers: if pn < 1 or pn > total: raise ValueError( f"Page number {pn} out of range (1–{total})" ) os.makedirs(output_dir, exist_ok=True) output_filenames: List[str] = [] for pn in page_numbers: filename = f"{filename_stem}_page_{pn}.pdf" full_path = os.path.join(output_dir, filename) writer = PdfWriter() writer.add_page(reader.pages[pn - 1]) with open(full_path, "wb") as f: writer.write(f) output_filenames.append(filename) return output_filenames