data-prep-toolkit-lang
View on PyPI — Reverse Dependencies (0)
1.0.0a0 | data_prep_toolkit_lang-1.0.0a0-py3-none-any.whl |
Wheel Details
Project: | data-prep-toolkit-lang |
Version: | 1.0.0a0 |
Filename: | data_prep_toolkit_lang-1.0.0a0-py3-none-any.whl |
Download: | [link] |
Size: | 129253 |
MD5: | aae832f030282cc5e27b92f375a52633 |
SHA256: | ba6d68d09ad1481c1e250fd87d080d3867a641b0d5a0535eb7216c68331df1f2 |
Uploaded: | 2024-12-12 15:54:19 +0000 |
dist-info
METADATA · WHEEL · RECORD · top_level.txt
METADATA
WHEEL
Wheel-Version: | 1.0 |
Generator: | setuptools (75.6.0) |
Root-Is-Purelib: | true |
Tag: | py3-none-any |
RECORD
Path | Digest | Size |
---|---|---|
dpk_doc_chunk/__init__.py | sha256=dENM5wNH4Xils3v7SAh-xc5BVR5awJltaqjI-SEJhQU | 25 |
dpk_doc_chunk/chunkers.py | sha256=4CU4n7WyunGYBOZIAxnySztgUsGCgO-OtceFIzT0S-g | 5277 |
dpk_doc_chunk/local.py | sha256=844l6aRdjtljqoAJM7RlP_UdB3M4md8-a2cslajWl98 | 1475 |
dpk_doc_chunk/local_python.py | sha256=eLLEULsv_jeRVfuViM10spWIDS0IGNPMKxtcP7CuDW0 | 2638 |
dpk_doc_chunk/transform.py | sha256=2ddhchis3_q27a12dopn4SboQPRm1brQpGsQnth0KMQ | 11732 |
dpk_doc_chunk/transform_python.py | sha256=vgo4uJLpQRVo54Ne1BSSMZP-GKhSpb4XvzSjNAbUzhA | 2677 |
dpk_doc_chunk/ray/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
dpk_doc_chunk/ray/local.py | sha256=f_btcY2hxDbO_9JjDh8tQiE9dCSrxTm3Sh55nDG4TJM | 2067 |
dpk_doc_chunk/ray/s3.py | sha256=GtfETyPL2JBjy59K6z0QDojvQ3co-R6OVSwrbLE9GzU | 2020 |
dpk_doc_chunk/ray/transform.py | sha256=J2-FmoFsLi1DkRZYvbFiUVg_v2Eh31LjTmiaGyZo7zI | 3214 |
dpk_doc_id/__init__.py | sha256=x-q6Vv8zqM58pO0cWX3IMj1cPhRF66d0NY36tD9iWZA | 106 |
dpk_doc_id/local.py | sha256=rQU6iCA8tn4vjF_veIxtI7tiWjxfj_ycO5dzmKNkj9E | 2188 |
dpk_doc_id/local_python.py | sha256=3KtwOXRi6qJtxzRqYRKWEoUK2jFmSb9llgOgtHQRto8 | 2209 |
dpk_doc_id/transform.py | sha256=5PqGPqEjNP8bsxDwg8POsf8oITWK0TerMHi5EgUyJ48 | 6679 |
dpk_doc_id/transform_python.py | sha256=kSpdim9Et9tUck0ICzx5f8Quu4R5IOQNGYA9ukgid9I | 5694 |
dpk_doc_id/ray/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
dpk_doc_id/ray/local.py | sha256=HLt7JSSnuC6whFku47V5ikPInWbDQRHQhFwHYijYrwY | 2261 |
dpk_doc_id/ray/s3.py | sha256=CT4H_bbLiVfzWlIvM_phFMgwVF6r_1z5Pby0K6-rekg | 2251 |
dpk_doc_id/ray/transform.py | sha256=_uZKBeVfeUbu7Ws5p06YUZCG9J58QGpbS2oZz_jU_M8 | 5513 |
dpk_doc_id/spark/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
dpk_doc_id/spark/local.py | sha256=O2zhDeglaE-jgSIiNnq4zTFwY1ZrDJM3gAxK4RhuTWs | 2118 |
dpk_doc_id/spark/transform.py | sha256=85hWlWR2feDAdDxGL-BrRP5uA1z4PBJzUhiNCv2Kjnk | 7896 |
dpk_doc_quality/__init__.py | sha256=x-q6Vv8zqM58pO0cWX3IMj1cPhRF66d0NY36tD9iWZA | 106 |
dpk_doc_quality/cc_net_prepro.py | sha256=B9lt_2ZFVsORqXHdepNQ3B-wd3CT_9wNrC41Qcedtg0 | 4929 |
dpk_doc_quality/doc_Gopher_statistics.py | sha256=KxZpWjXKX5l9FkQnyLzwDb3fmdG-8awoGbKWnleFtPI | 6530 |
dpk_doc_quality/doc_c4_statistics.py | sha256=r1GPFg0mmQIqtA9Pdt3F3hi3FpwugOP3Cey-aXAjFpg | 6068 |
dpk_doc_quality/local.py | sha256=GYVZIsQqq4ICaJUIh7G37CbDDSUXZCZUIBxuGG2O83M | 1731 |
dpk_doc_quality/local_python.py | sha256=UVeXbnWd3cORbGs3uhVRO5s7zUvrzGUvYZDQAyZ9VE4 | 2646 |
dpk_doc_quality/transform.py | sha256=qVPjaWssgfQl9A1bWNHzc5GgGGvfDUcX0Y8QoRCc-Ok | 10904 |
dpk_doc_quality/transform_python.py | sha256=hM7dOOVE0OWYcoIyyhDamTD63Zbzr78O7t0c5G26sSU | 3315 |
dpk_doc_quality/utils.py | sha256=n8vHmiLLAtO45_7S8lnseH4sUmyPrhBNIPuUEOhCqfI | 2264 |
dpk_doc_quality/ldnoobw/de | sha256=6-RNd3KJc5Dif3vsQS_c3w78KU5NsVf4SHxM2NT6vVU | 534 |
dpk_doc_quality/ldnoobw/en | sha256=r4UezvHV8hLKuhczmxKsOcwv7314x0h29nI3ZE_O6L0 | 3777 |
dpk_doc_quality/ldnoobw/es | sha256=BzM0JhzC58CDOfrv0vGfliPoJAoKwY_iomw2i4l0ls4 | 617 |
dpk_doc_quality/ldnoobw/fr | sha256=eXY6cF4IJWs5OVKFK7Y6qQP0SF4O8C9wc19eUO__ITw | 793 |
dpk_doc_quality/ldnoobw/ja | sha256=-DxNCEPqZHTBe_Fx7dcrYWs69AP4YT8s3hvYQfr28xY | 2339 |
dpk_doc_quality/ldnoobw/pt | sha256=Be9SSwu3-D-HJX9uxNq_cOFri7aTD3afuT3q9gj_GW0 | 629 |
dpk_doc_quality/ray/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
dpk_doc_quality/ray/local.py | sha256=nN32AfJN41wE3Trenwz6g1LcoJbzSZJ7-GvUIy6zig8 | 2435 |
dpk_doc_quality/ray/s3.py | sha256=s41OEMKU6tyDXftnLpQFIrkHm_VPoXA9antgbHS7pBQ | 2706 |
dpk_doc_quality/ray/transform.py | sha256=xuJFFAKMkIaNdMN5jEBJGoFRcj9bgkm2G81ffUd5hPM | 1713 |
dpk_hap/__init__.py | sha256=x-q6Vv8zqM58pO0cWX3IMj1cPhRF66d0NY36tD9iWZA | 106 |
dpk_hap/local.py | sha256=OCR7K03RkK_fobGYbIkrknEG1l3YI32_Gc5UPMW-5Jw | 1858 |
dpk_hap/local_python.py | sha256=jGzAL9fyeAoRnoikiZx0M-I3kKGNCgup0eH02r0CLlw | 2021 |
dpk_hap/transform.py | sha256=1R0mQpOTnaQ3RTaeK9pWqlrPHwhUCeAb1bn716QheRo | 7105 |
dpk_hap/transform_python.py | sha256=LnRq43e774V6mDQPwk5TbvGaCJeI7Of-imjgoPt0ZSE | 2411 |
dpk_hap/ray/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
dpk_hap/ray/local.py | sha256=9n6rAkd6gHKKURc0XZWtW08noYzBGOE0Q7B7v6HYLz0 | 2033 |
dpk_hap/ray/s3.py | sha256=TDR76lOcfyj0Ty-5msWCvfxnYpiV-RHVwv_QYWGnELY | 2151 |
dpk_hap/ray/transform.py | sha256=s00JUk3oAyBunZMizc_vrSyg7CgH4uK34P6bB23-WKg | 1513 |
dpk_html2parquet/__init__.py | sha256=3KmYJRl2lrHqrmY38_weBGYBphvQ9KDwRVQblLQebe4 | 105 |
dpk_html2parquet/local.py | sha256=IP_n6APkuybtHmeykhYP-QqlOsBnu7VcOa2N1T30LCc | 1592 |
dpk_html2parquet/local_python.py | sha256=FDI5lfc9h-Z-B10asIyYMhoYvDHBU0EWByCFuc4RMl0 | 1950 |
dpk_html2parquet/transform.py | sha256=f6BkeRIDPP99_IOQVYQ3Qb0td3Zo-Z8vat3pFYZtMaE | 10627 |
dpk_html2parquet/transform_python.py | sha256=jSaQQZgcq1AnedN_X5GEJjQlDGH98hjiIdgaX7DWgs4 | 2559 |
dpk_html2parquet/ray/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
dpk_html2parquet/ray/local_ray.py | sha256=V3PlLx7hKVBM8ad6tomdOVw0F3uULlIj-lzKXvyRC8o | 2181 |
dpk_html2parquet/ray/s3_ray.py | sha256=UAytiSJ1gQjj7kStr7vhv5aaB5i4xW5OLhRnyUympOs | 2061 |
dpk_html2parquet/ray/transform.py | sha256=WpPtPJMDXgiOAunXb5MkXd9rleGYnnfTgOoHIFZiPQo | 2535 |
dpk_lang_id/lang_models.py | sha256=ZniqCjUwVKiL5DSY--Z7NfFcMEf0cWHUGyR745i9Hl8 | 1891 |
dpk_lang_id/local.py | sha256=y5aSAZXAP8kXLsxjXAWBFqEYsa6i8b0OQFeNOL-lDNk | 1933 |
dpk_lang_id/local_python.py | sha256=vkuY_Ncx3XSNYAyWOIlju6tMYaM_mE-F7ZfOSwR3vCA | 2334 |
dpk_lang_id/nlp.py | sha256=hdqabpC9PCBPJn0OnGP1waMf3Idf2oJukFFcBFl1tXo | 1863 |
dpk_lang_id/transform.py | sha256=tEDGLBlCJDO0TylxtCl7UX5ygoBwCmOXljsQuhhCkzM | 6761 |
dpk_lang_id/transform_python.py | sha256=gftOqpfT7KeXqT0xArKrIjTjFuT1hAhiP0hdwkwOteY | 2634 |
dpk_lang_id/ray/local.py | sha256=h3g_-Gzuf_Z8-w0tl4d6snoup1ghOrUxiqQSfAOSt6I | 2691 |
dpk_lang_id/ray/s3.py | sha256=MHMTXMwE0_5QYsVouphZ4a-NiuJ7mOFf1JBmR3HWihI | 2634 |
dpk_lang_id/ray/transform.py | sha256=ERQBTNT4UxpbpK6BmReyBH5UT9Vvswex0ET6DiY-9Ac | 1751 |
dpk_pdf2parquet/.dockerignore | sha256=3QTY9fGeIxLthVOMrrg2t3LSQW_3TA17yNwpNZG_D2U | 59 |
dpk_pdf2parquet/.gitignore | sha256=a5B6rgjYJDVdI93RQR68DlACWl_wqkNANnTmnoS2Jg0 | 428 |
dpk_pdf2parquet/__init__.py | sha256=dENM5wNH4Xils3v7SAh-xc5BVR5awJltaqjI-SEJhQU | 25 |
dpk_pdf2parquet/local.py | sha256=UqNHMn6Mp02H5fQ5_2EP-DlQgLX2k1jNItIA4mugFrA | 1613 |
dpk_pdf2parquet/local_python.py | sha256=beqmCQ6lmEaIYPiQcWolR9AFeHY7mDGQ09SQ8b55Vx4 | 2270 |
dpk_pdf2parquet/transform.py | sha256=PdWTiBOl-xJVlMBsGsvvc4QVVnQSCzaOSRXjprGU2Ng | 20716 |
dpk_pdf2parquet/transform_python.py | sha256=-vCTPXJn_UBIZyEn5oIzzI5wGHLNBlzw0hUBJD1HB8s | 2561 |
dpk_pdf2parquet/ray/.dockerignore | sha256=3QTY9fGeIxLthVOMrrg2t3LSQW_3TA17yNwpNZG_D2U | 59 |
dpk_pdf2parquet/ray/.gitignore | sha256=a5B6rgjYJDVdI93RQR68DlACWl_wqkNANnTmnoS2Jg0 | 428 |
dpk_pdf2parquet/ray/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
dpk_pdf2parquet/ray/local_ray.py | sha256=So5wwsnIsOLHqFav_kprlCF9gyfq_VCvS1uJtLfP86M | 2246 |
dpk_pdf2parquet/ray/s3_ray.py | sha256=IQauqhMHOfg0bPPjHCAT3CQ5qEg4w1lYJZyt70r0l6A | 2170 |
dpk_pdf2parquet/ray/transform.py | sha256=1fH2mjqaAL2e6oSBiHpxeYfJ9Q2zumrdzkyOv8anIX4 | 3920 |
dpk_text_encoder/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
dpk_text_encoder/local.py | sha256=TdbVtESTaDavHB7gf0VW97mfCbTzh8dxdcTI_4-gi-E | 1911 |
dpk_text_encoder/local_python.py | sha256=7PnxN0aajmickBOq4LfsxJFPbeik6I4Jr5zmGus4d8o | 1899 |
dpk_text_encoder/transform.py | sha256=pWekXVfQ7Spq3hWiP27hJGnOMaHD2HDwb33kVNKxOFc | 5146 |
dpk_text_encoder/transform_python.py | sha256=-CAj5jc6MI8oS4CI6qakReb1haYJg5OayEQg8Gw2onc | 2684 |
dpk_text_encoder/ray/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
dpk_text_encoder/ray/local.py | sha256=EQaUrkkhU8IXaI3KYdfwHpGUA_PxkOq9h21vUb-XuaU | 2079 |
dpk_text_encoder/ray/s3.py | sha256=vW7CrGFvLZSHeVuc0ypiCKLDw1mnkLTzocPZJrzmSyg | 2028 |
dpk_text_encoder/ray/transform.py | sha256=SRmBgg_r6cg9Yr0mFO5TMNIPWx99Lf-uEPsqO4I2ShE | 2931 |
dpk_tokenization/local.py | sha256=JFtnIVbksslionjnOtHu3GgOqAdEJe0imyg56bUR_fM | 1673 |
dpk_tokenization/local_long_doc.py | sha256=2csRJCDTcxFNErXFri0SS3FPCsu97LQiSPAFzH5yLSM | 2140 |
dpk_tokenization/s3_long_doc.py | sha256=jvwaX-mmDUSoH6uc_AVoBhfvZtBHtDMcI3Z0DWKgZQ8 | 2153 |
dpk_tokenization/transform.py | sha256=j2Xm8hJKLd4uzsihTXvTsSuClBm9KIT1iPn0ondTji4 | 10706 |
dpk_tokenization/transform_python.py | sha256=4GEGbJSAPbAxf7qECJxFjm_jKj0gQ86FmhJn9scMvgE | 2059 |
dpk_tokenization/utils.py | sha256=uJuozmA3hIGQHIFol3w_hT6Dw5J3dMu71or9bK0Pk6c | 5475 |
dpk_tokenization/ray/local.py | sha256=NEj27OdWF0OEsAqqnP5FaFJum6GZs2ViWpowTI_LFb0 | 2000 |
dpk_tokenization/ray/s3.py | sha256=O1o2d4hDzS5BYuxEve7UnyCU9gkIjPZ-7HtsKpmBs7w | 2171 |
dpk_tokenization/ray/transform.py | sha256=xO5nQdc52lOadMxxTXFLflt9s44ZVKsw9DVYJ_6L6ro | 1330 |
dpk_web2parquet/config.py | sha256=8AN85NQziUQ6jButezmNXSFdAdJ41b76a4olTmyx-bI | 3080 |
dpk_web2parquet/local.py | sha256=raW-mvoDGMzP3nP_H4QCwQP0SWat0VYDSPYsgB4naWg | 1147 |
dpk_web2parquet/local_python.py | sha256=YIMvFU89eXGZIPP03UdzwFPR2_S788lOd7iAmk07eR4 | 2018 |
dpk_web2parquet/python_runtime.py | sha256=iX5Io_IrJzvMejP8Wg0KM91fdPmSLdCk0RIeETHFkr0 | 1713 |
dpk_web2parquet/transform.py | sha256=F6JklxecXT2QH-GQcxz4_zjCRY0RTr4JWdnA-zmp6v0 | 4843 |
dpk_web2parquet/utils.py | sha256=E0ZwdxxH3w1bGkcJ2HUjIsMZ38B5Ogyr5fFUM9jSU2w | 1458 |
data_prep_toolkit_lang-1.0.0a0.dist-info/METADATA | sha256=uyYCGGeAXK4ChxQkfYGgQNaHBA5GAbS1NoJFYQH5iWo | 7841 |
data_prep_toolkit_lang-1.0.0a0.dist-info/WHEEL | sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U | 91 |
data_prep_toolkit_lang-1.0.0a0.dist-info/top_level.txt | sha256=8pDowSk_zLZ8C4ls2tOprvY3gr0CKLRncqt-gu4l-DA | 144 |
data_prep_toolkit_lang-1.0.0a0.dist-info/RECORD | — | — |
top_level.txt
dpk_doc_chunk
dpk_doc_id
dpk_doc_quality
dpk_hap
dpk_html2parquet
dpk_lang_id
dpk_pdf2parquet
dpk_text_encoder
dpk_tokenization
dpk_web2parquet