docparser-feb

View on PyPIReverse Dependencies (0)

0.1.5 docparser_feb-0.1.5-py3-none-any.whl

Wheel Details

Project: docparser-feb
Version: 0.1.5
Filename: docparser_feb-0.1.5-py3-none-any.whl
Download: [link]
Size: 436956
MD5: b6284b97d015439aa2e83faff250fd9a
SHA256: 30b5073ae113a410e93c36da8ae103e13d892f7188cb7a43361896c46c29e76e
Uploaded: 2024-10-09 15:25:35 +0000

dist-info

METADATA

Metadata-Version: 2.1
Name: docparser-feb
Version: 0.1.5
Summary: Document parsing tool for LLM training and Rag
Author-Email: Licheng Wang <244267620[at]qq.com>
Project-Url: Homepage, https://github.com/feb-co/DocParser
License: MIT
Keywords: pdf,LLM,ChatGPT,transformer,pytorch,deep learning
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Education
Classifier: Intended Audience :: Science/Research
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
Requires-Python: >=3.8
Requires-Dist: albumentations
Requires-Dist: bs4
Requires-Dist: cn2an
Requires-Dist: cachetools
Requires-Dist: datrie
Requires-Dist: effdet
Requires-Dist: hanziconv
Requires-Dist: html-text
Requires-Dist: lxml
Requires-Dist: layoutparser
Requires-Dist: nougat-ocr
Requires-Dist: nltk
Requires-Dist: opencv-python
Requires-Dist: openpyxl
Requires-Dist: onnxruntime
Requires-Dist: pdfplumber
Requires-Dist: pyclipper
Requires-Dist: PyPDF2
Requires-Dist: python-docx
Requires-Dist: python-pptx
Requires-Dist: ruamel.yaml
Requires-Dist: roman-numbers
Requires-Dist: shapely
Requires-Dist: StrEnum
Requires-Dist: tika
Requires-Dist: tiktoken
Requires-Dist: transformers
Requires-Dist: tokenizers
Requires-Dist: word2number
Requires-Dist: xgboost
Requires-Dist: langdetect
Description-Content-Type: text/markdown
License-File: LICENSE
[Description omitted; length: 2853 characters]

WHEEL

Wheel-Version: 1.0
Generator: setuptools (75.1.0)
Root-Is-Purelib: true
Tag: py3-none-any

RECORD

Path Digest Size
docparser_feb/__init__.py sha256=K5SiDdEGYMpdqXThrqwTqECJJBOQNTQDrnpc2K5mzKs 21
docparser_feb/api/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
docparser_feb/api/functional/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
docparser_feb/api/functional/html_parser.py sha256=y3Bsa7EN7VqtIDPAyTn0JICzb9DSE_jYdYtiWbQ_64s 2930
docparser_feb/api/functional/pdf_parser.py sha256=32FisdE5vHeMLF5Ur03fUzu6K3qmVjQPg_n8bBb9k4o 5288
docparser_feb/api/server/settings.py sha256=cVz-FAz3BNf2q6KBfWUWo9NZiB3MV-vjRjGdl4qU1oE 1202
docparser_feb/scripts/__init__.py sha256=aOZLMUzN_b1JUX01xKEUk8VpTUpldUEkTvrrg6Y3b78 2983
docparser_feb/scripts/file_utils.py sha256=wJBwva1tyoOPNn6KQ1UhR35uju-Ombp5B9ocGVXrv5k 7168
docparser_feb/scripts/log_level.py sha256=b-rJjXelyQE1fs51mJVsJHugceOi9WXhjNDuy-JUNCs 143
docparser_feb/scripts/markdown.py sha256=xKQKNILdTQLITfP2VrD6-RxkNpC_R6TlUEdWYdVhhpY 3825
docparser_feb/scripts/string_utils.py sha256=fOJF2y6Jz553Zp90tSslAqGTl7u6uW7hhmcyw-Al144 407
docparser_feb/scripts/nlp/__init__.py sha256=CGeIUwnAIk6FxtK61TeJIwQuNhpJpFcCF7anAl7uFXo 18304
docparser_feb/scripts/nlp/query.py sha256=PaMrTsTjWkQ6UWoLsM1gB0NcUZWPslaGYjoZwrFTE2A 7487
docparser_feb/scripts/nlp/rag_tokenizer.py sha256=XpbViGQ_1rZo4meqPdQEZb0kSx3RkEe-mFWV6nRNI_U 12081
docparser_feb/scripts/nlp/split_text.py sha256=7OxCjirX2n2Fo6c0xsOySGYUhCrtdSvpHdBhJaSCZBM 1078
docparser_feb/scripts/nlp/surname.py sha256=2hlXbXQ-sa5ZzyC4Nzh-Dc18rIWRdOPFza7eu1d1zqM 4266
docparser_feb/scripts/nlp/synonym.py sha256=uhV6VDzyZcy7ZhcavXdp3hdgrmkq6YpWE24wUTOWsD4 2278
docparser_feb/scripts/nlp/term_weight.py sha256=uTuYybybw4bPgwLsAGpfGmzCee62vhEZTZNBIyUW_bU 7821
docparser_feb/scripts/openai/openai.py sha256=SQwCjYNfv2shLTJOsE9ATnUtRabIa_JEU0-CjUf7G80 2628
docparser_feb/scripts/openai/prompts.py sha256=JTHJTm3US3UAca-kU8cx0cnMQoFVS8pEW05LdPr9csQ 1735
docparser_feb/scripts/postprocess/__init__.py sha256=YXZUrHWuHw9LHdlzlCUk76eJ-5fq7lvLJn1h6yGGayI 109
docparser_feb/scripts/postprocess/html_postprocess.py sha256=o4IXxeVm4YA7V8ShOyemupquCAxm6J9pQZ9Ml0raGmU 2292
docparser_feb/scripts/postprocess/pdf_postprocess.py sha256=OzHd74fBPZCYmyT4f38nzMLoztHe4JKoLBL6kEZ_QXU 5518
docparser_feb/scripts/rendering/pdf_rendering.py sha256=pygi-kQMase3RBP6LfYLnu4Yu0I9Emairioz82XGWqQ 932
docparser_feb/src/__init__.py sha256=7pOU3-5TFvqifMw5tH0WVjeYUqsvlpZpqELjl69jx6E 41
docparser_feb/src/parser/__init__.py sha256=le2EiPCYx3YDj8TK4MD19SSEXlfj8ufE9u4ZzMXPzfA 186
docparser_feb/src/parser/docx_parser.py sha256=Yoct1mRzT7BSXDJl19ZCFWHbjNe5wmsMPylq0r-T3Ag 4446
docparser_feb/src/parser/excel_parser.py sha256=wLTjvrEvkdhKOeSV2jw4C53KAKWp2XKq-TiAsIeiASE 2766
docparser_feb/src/parser/html_parser.py sha256=VWHbPhuaofQHrAwxIR55ZOzVo7t9BNBxx_f06_dJ4YI 2658
docparser_feb/src/parser/pdf_parser.py sha256=IZkIR-59IkgTXiUTTvGh0WEDz0MjNjBCy2HBe6ezLTU 49810
docparser_feb/src/parser/resume/__init__.py sha256=r0UmSVLOjt_3rwOnksoGZBetkx6Y9hyGw-yk01sXfhI 2792
docparser_feb/src/parser/resume/step_one.py sha256=6MftdXYSc0h0-VxNOxeLzslG7KvShw3NT3_1GW73Zk0 6247
docparser_feb/src/parser/resume/step_two.py sha256=-XIwGk21oHpN0p0w1CVow1I1VYT6okVZEBvv0PuQml4 25452
docparser_feb/src/parser/resume/entities/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
docparser_feb/src/parser/resume/entities/corporations.py sha256=mOoVfgJfXboFiP4wpyDuTUnv64R8mtw6UBJaQh56BkI 3506
docparser_feb/src/parser/resume/entities/degrees.py sha256=EpyUUoScrqdzNdLZAwn_psT5o7ThbVA05h-F4RU21-E 940
docparser_feb/src/parser/resume/entities/industries.py sha256=_8i3eeHq_nqkURVHRnqE4FQfIOW8aJVCVsyPO3AWsHA 32145
docparser_feb/src/parser/resume/entities/regions.py sha256=OGbZrWfAlna780JpxGDJ6Cw3r9bnd2M_HDdRunGXXzw 34068
docparser_feb/src/parser/resume/entities/schools.py sha256=8ytPuwn1blcsQ_q0vO7n51fDmZKIdtWo_JNcpfKtr5M 2641
docparser_feb/src/parser/resume/entities/res/corp.tks.freq.json sha256=XiJK2bw5F15HqEg891UOeI1RnILuC2pTxLlTIWTVbU8 1238
docparser_feb/src/parser/resume/entities/res/corp_baike_len.csv sha256=5CtXRJKT9OPNx-54tGq_5w9eSPGndXfCCsolM1QAeFw 372893
docparser_feb/src/parser/resume/entities/res/corp_tag.json sha256=e6Id1H4CpO4WXdQRkaDXW6Zf_pMSZrMqheCXdxIZVPY 288162
docparser_feb/src/parser/resume/entities/res/good_corp.json sha256=f5MN4YDyLLVlMtmxgDZBH9htXw42LEXVQeQSz8aWEZ8 18366
docparser_feb/src/parser/resume/entities/res/good_sch.json sha256=FwgYsTl784yKdflYWnoT3ZQ1eZMP0Ukoj02m-cbHyEY 11026
docparser_feb/src/parser/resume/entities/res/school.rank.csv sha256=uDNVVQ-opTDYz3ndb4R_HthyiDJQOSqJ_J8SIpOGF3I 91923
docparser_feb/src/parser/resume/entities/res/schools.csv sha256=N5MQ5lX0NRYDhpTiKyMXmarXaovMYvUx91H05zwwgOI 487374
docparser_feb/src/vision/__init__.py sha256=SRHamDEbDwFa-m8RdzR-yEAvTzCBo_vxWo52NHOM_sU 2040
docparser_feb/src/vision/layout_recognizer.py sha256=PxFqv0BPoV-tCCEzuk4ANdy8T6M3tKm1emTpr5KGPbg 8409
docparser_feb/src/vision/nougat.py sha256=wIyb3B8SykgbGZakd5lyoFIuSHSh73qUW--NbfZ-pP4 4512
docparser_feb/src/vision/ocr.py sha256=vMKrGlAxZ-H-VFQtxG8nsj4e3ECVMwdeIcv3Q8vh-uI 22479
docparser_feb/src/vision/ocr.res sha256=2RfXSJ0sINLkzujBcGj4rFT27kBqBL9aw7H_XcOOHlI 32871
docparser_feb/src/vision/operators.py sha256=Aw0dU8m8I-9lGQjKfgrEO5hfg2gec4CaBE3CpCj6h9U 24362
docparser_feb/src/vision/postprocess.py sha256=D53ky-elllW44Au9KxXqryLT8baYg0itAZ6Yc6wMLf8 13467
docparser_feb/src/vision/recognizer.py sha256=0w2ykbcrvVCRh8W9qdOXFHsavBfX5q8ciHBTi3U5heE 20390
docparser_feb/src/vision/seeit.py sha256=L3IsLe8WFHt2zX-41mvqeH5ztn1l8ZCOiuU3andXzlc 2899
docparser_feb/src/vision/table_structure_recognizer.py sha256=ce3yFau8aj1msPV2LKF27W3nA90VygSqV-UJU3MNxOU 24119
docparser_feb-0.1.5.dist-info/LICENSE sha256=z5sXgi0fzU_zLMvhQYM4b7Ot9vL_ktwYQTCCP3_CgXM 1087
docparser_feb-0.1.5.dist-info/METADATA sha256=b5BWEod-0zM16nN3fCamIPkZ60nu-EQH68gR6kk8gRI 4602
docparser_feb-0.1.5.dist-info/WHEEL sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ 91
docparser_feb-0.1.5.dist-info/entry_points.txt sha256=hoPcbV7bfIRv30yvUGY8bT4Z8sy-Yeg1ynTFXfhJWM8 142
docparser_feb-0.1.5.dist-info/top_level.txt sha256=7ARo3b4PxwEg0xT95lCXx8qmklu8C7WjQasC25bWbO8 14
docparser_feb-0.1.5.dist-info/RECORD

top_level.txt

docparser_feb

entry_points.txt

docparser-html = docparser_feb.api.functional.html_parser:main
docparser-pdf = docparser_feb.api.functional.pdf_parser:main