update loader

14295392 · imClumsyPanda · 871a8716 · 14295392 · 14295392
--- a/loader/image_loader.py
+++ b/loader/image_loader.py
@@ -15,7 +15,7 @@ class UnstructuredPaddleImageLoader(UnstructuredFileLoader):
            if not os.path.exists(full_dir_path):
                os.makedirs(full_dir_path)
            filename = os.path.split(filepath)[-1]
-            ocr = PaddleOCR(lang="ch", use_gpu=False, show_log=False)
+            ocr = PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=False, show_log=False)
            result = ocr.ocr(img=filepath)
            ocr_result = [i[1][0] for line in result for i in line]

--- a/loader/pdf_loader.py
+++ b/loader/pdf_loader.py
@@ -15,7 +15,7 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
            full_dir_path = os.path.join(os.path.dirname(filepath), dir_path)
            if not os.path.exists(full_dir_path):
                os.makedirs(full_dir_path)
-            ocr = PaddleOCR(lang="ch", use_gpu=False, show_log=False)
+            ocr = PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=False, show_log=False)
            doc = fitz.open(filepath)
            txt_file_path = os.path.join(full_dir_path, f"{os.path.split(filepath)[-1]}.txt")
            img_name = os.path.join(full_dir_path, 'tmp.png')