Skip to content

Commit 17c00a3

Browse files
authored
Merge pull request #246 from Val3nt-ML/pdfmarkdownreader_images
Enable to pass 'load_kwargs' arguments to PDFMarkdownReader 'load_data' method
2 parents 266ee82 + 4e599ed commit 17c00a3

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

pymupdf4llm/pymupdf4llm/llama/pdf_markdown_reader.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def load_data(
6161
for page in doc:
6262
docs.append(
6363
self._process_doc_page(
64-
doc, extra_info, file_path, page.number, hdr_info
64+
doc, extra_info, file_path, page.number, hdr_info, **load_kwargs
6565
)
6666
)
6767
return docs
@@ -76,6 +76,7 @@ def _process_doc_page(
7676
file_path: str,
7777
page_number: int,
7878
hdr_info: IdentifyHeaders,
79+
**load_kwargs: Any,
7980
):
8081
"""Processes a single page of a PDF document."""
8182
extra_info = self._process_doc_meta(
@@ -86,7 +87,9 @@ def _process_doc_page(
8687
extra_info = self.meta_filter(extra_info)
8788

8889
text = to_markdown(
89-
doc, pages=[page_number], hdr_info=hdr_info, write_images=False
90+
doc, pages=[page_number],
91+
hdr_info=hdr_info,
92+
**load_kwargs,
9093
)
9194
return LlamaIndexDocument(text=text, extra_info=extra_info)
9295

0 commit comments

Comments
 (0)