llm-datasets

View on PyPIReverse Dependencies (1)

0.0.3 llm_datasets-0.0.3-py3-none-any.whl

Wheel Details

Project: llm-datasets
Version: 0.0.3
Filename: llm_datasets-0.0.3-py3-none-any.whl
Download: [link]
Size: 219938
MD5: ef7c63ecae2ce48b93e928d7e9571c1f
SHA256: e10db0611bcecc28c411bc7768afb8948f61260364cb75255dedd37c1d8e1ec8
Uploaded: 2024-03-26 15:00:23 +0000

dist-info

METADATA

Metadata-Version: 2.1
Name: llm-datasets
Version: 0.0.3
Summary: A collection of datasets for language model training including scripts for downloading, preprocesssing, and sampling.
Author-Email: Malte Ostendorff <malte.ostendorff[at]dfki.de>
Project-Url: Documentation, https://github.com/malteos/llm-datasets/tree/main#readme
Project-Url: Source, https://github.com/malteos/llm-datasets
Project-Url: Tracker, https://github.com/malteos/llm-datasets/issues
License: Apache 2.0
Classifier: Development Status :: 3 - Alpha
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Requires-Python: >=3.9
Requires-Dist: requests
Requires-Dist: pandas
Requires-Dist: datasets
Requires-Dist: smart-open
Requires-Dist: wget
Requires-Dist: zstandard
Requires-Dist: polars
Requires-Dist: pyarrow (>=14.0.1)
Requires-Dist: dill
Requires-Dist: llm-datasets[datasets]; extra == "all"
Requires-Dist: llm-datasets[dev]; extra == "all"
Requires-Dist: llm-datasets[docs]; extra == "all"
Requires-Dist: llm-datasets[viewer]; extra == "all"
Requires-Dist: wget (==3.2); extra == "datasets"
Requires-Dist: mwparserfromhell (>=0.6.4); extra == "datasets"
Requires-Dist: gensim (>=4.3.0); extra == "datasets"
Requires-Dist: zstandard (>=0.20.0); extra == "datasets"
Requires-Dist: treelib (>=1.6.4); extra == "datasets"
Requires-Dist: conllu (>=4.5.3); extra == "datasets"
Requires-Dist: prevert (>=1.0.2); extra == "datasets"
Requires-Dist: translate-toolkit (>=3.9.1); extra == "datasets"
Requires-Dist: folia (>=2.5.8); extra == "datasets"
Requires-Dist: estnltk (>=1.7.1); extra == "datasets"
Requires-Dist: Jinja2; extra == "datasets"
Requires-Dist: pre-commit; extra == "dev"
Requires-Dist: black; extra == "dev"
Requires-Dist: flake8; extra == "dev"
Requires-Dist: flake8-pyproject; extra == "dev"
Requires-Dist: pytest; extra == "dev"
Requires-Dist: pytest-cov; extra == "dev"
Requires-Dist: pytest-runner; extra == "dev"
Requires-Dist: coverage; extra == "dev"
Requires-Dist: pylint; extra == "dev"
Requires-Dist: twine; extra == "dev"
Requires-Dist: jupyter; extra == "dev"
Requires-Dist: mkdocs; extra == "docs"
Requires-Dist: mkdocs-material; extra == "docs"
Requires-Dist: mkdocs-exclude; extra == "docs"
Requires-Dist: mkdocstrings; extra == "docs"
Requires-Dist: mkdocstrings[python]; extra == "docs"
Requires-Dist: streamlit; extra == "viewer"
Requires-Dist: ngrok; extra == "viewer"
Provides-Extra: all
Provides-Extra: datasets
Provides-Extra: dev
Provides-Extra: docs
Provides-Extra: viewer
Description-Content-Type: text/markdown
License-File: LICENSE
[Description omitted; length: 8465 characters]

WHEEL

Wheel-Version: 1.0
Generator: bdist_wheel (0.43.0)
Root-Is-Purelib: true
Tag: py3-none-any

RECORD

Path Digest Size
llm_datasets/__init__.py sha256=4GZKi13lDTD25YBkGakhZyEQZWTER_OWQMNPoH_UM2c 22
llm_datasets/__main__.py sha256=xUwnDRwIB95TGd8yh-K-hE98bDhqG_fvTzNVOVeWicg 1803
llm_datasets/chunkify_datasets.py sha256=xiyXrjwg-YS7GJojt8xrQGCuiYcjVt2L21ihJj7MJg0 7312
llm_datasets/collect_metrics.py sha256=dLB7tqQj9ZdONEe0_UwJpWk3H2D34MfQsLtrb6yGSnI 3923
llm_datasets/compose_dataset.py sha256=1X2N0npta3mRFgb9dksvLWPRZ-79cM3WrEN3eeEMHuY 3924
llm_datasets/convert_parquet_to_jsonl.py sha256=TA70gmV_ofAJPyO_eisbpdJGbvy7WhA4QTFYP8RJZME 1064
llm_datasets/extract_text.py sha256=ichfKR0SLn9ycbBVAHibkN2I-yKX9lYvosm6Gepk5Q4 2982
llm_datasets/hf_tokenize_parquet_dataset.py sha256=H4d9oJWCRn5QPM0OqPUV2F7tnovydoac2dWsNtneb2M 16552
llm_datasets/megatron_tokenize_parquet_dataset.py sha256=vsps8up5Uia8_KVUg-dAGhs6hQ6Lop-m_rfbCMHikeY 19243
llm_datasets/print_stats.py sha256=BQ6a4YD_Kbp5Ns2V-FGsvx-ASG9ky252KXEDHjcC5VE 1769
llm_datasets/shuffle_datasets.py sha256=TNj1gdQ2VzGXfxXYmDyOii50gSiVGSvjJTsykKcYpMo 5391
llm_datasets/train_sp_tokenizer.py sha256=8FVuYZsg3BYd-P63Z8PEEdpEAb4mW46fIwsJFO7l8cU 9265
llm_datasets/commands/__init__.py sha256=VJXHX2OQhgQiBt80K2pXOW5sq2AimOAoMUwIjcKQ8U0 2412
llm_datasets/commands/chunkify_command.py sha256=hkfErBjYqU2IQFuNWmRoIV-qK77Y-QrWZmjRpYupvMI 2474
llm_datasets/commands/collect_metrics_command.py sha256=OPHxlo7Wamgrz_D89qNwNDJjJTNruYxvZeHGS7oM70E 2825
llm_datasets/commands/compose_command.py sha256=eA9UNjssX7KI38z7zMKgjQTF8hFbQwVTIiZABdhh2CA 3486
llm_datasets/commands/convert_parquet_to_jsonl_command.py sha256=utKi3DQ7vfC-5YuYHXKNh1EyO-gzyvVQdb01clgaS0A 1541
llm_datasets/commands/extract_text_command.py sha256=Yc90iMec5bcW_lV1JqMnhR7tycpHjAFFi_uVnhr7S2E 3642
llm_datasets/commands/hf_upload_command.py sha256=0wbsIo4zgshT11dcVY_9r6voMG4WNYCeIXrwxcOuxVw 4354
llm_datasets/commands/print_stats_command.py sha256=GyO5AIaqrWaadSjI6dbeRHXszyh7bt5anaSo-cJxOKk 3186
llm_datasets/commands/render_docs_command.py sha256=ZJYbwjslao6CkKqM42zuCgByo19txrPk5DUBpcEC7vM 10883
llm_datasets/commands/shuffle_command.py sha256=CX_H1A0GbVq90tlEyYJfoP2kxmgu-XBIlrX3jKsDFTU 2951
llm_datasets/commands/train_tokenizer_command.py sha256=7UBQ3kXUfcpaJ08M-QlB833BbD-ItkpDTDHKDmIRL5c 3593
llm_datasets/datasets/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/base.py sha256=xiOm0RjNwIA8WkyirkZZ7yI_PD_RPKtVI02YZ0MV0WQ 39491
llm_datasets/datasets/dataset_registry.py sha256=m7HLU2iBSvqifpfWrx-aBwR7IVsbfJH-dJqba3_fE2I 9262
llm_datasets/datasets/hf_dataset.py sha256=gzWqzl_957Gsgov68XAtVjE1KE9Q5EclD7Zev2QEMSs 4969
llm_datasets/datasets/jsonl_dataset.py sha256=7gqQ8JuHig-JocBubbiHgljuVt0yDazJIrRP1dcPOQo 3551
llm_datasets/datasets/parquet_dataset.py sha256=1GAIn5fsZPXeovWckXcmYZbL2LcdBp4HKGxswFxZAWk 1502
llm_datasets/datasets/bg/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/bg/bgnc_admin_eur.py sha256=Tpm9P7Wdm364Q22E5g5SRm05d2JbSbP3N6RrMSWdV0U 1804
llm_datasets/datasets/bg/bgnc_news_corpus.py sha256=Z2wlQtFdc8n5ZQ-iqQ6W9-eDOMak4oVOD_hriHSp40g 1780
llm_datasets/datasets/bg/bulgarian_news.py sha256=RsfCpMkY3CjuAS7OtKP8LndIS1AXD-7kmS5IpEtfZx0 1482
llm_datasets/datasets/bg/bulnc.py sha256=Pv3KBEd056bmaWEmC3L1-c5YbQEusXywdLejc8b2oKs 2100
llm_datasets/datasets/code/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/code/starcoder.py sha256=nQ28YGx0VWEabpD2pXhaxHaLcLjwo1PoFHo9amUGRYc 8838
llm_datasets/datasets/cs/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/cs/cs_en_parallel.py sha256=EooMIb6boJNshUI2nkhh00jAERx4BYzVkxRJJDaSMFw 3616
llm_datasets/datasets/cs/syn_v9.py sha256=Distm_U62sM9jTMXfZLCuc_93LB8d-iqKUk00sTiRTk 4543
llm_datasets/datasets/da/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/da/danewsroom.py sha256=bmIjLTrggganTREnoXL9wuzUAvS1BtfSjs_mb7dUXdI 2968
llm_datasets/datasets/da/danish_gigaword.py sha256=65ShOdIWgDnd-NrKSURe9wPaR6e3VbK61k2FSuhnsrc 2936
llm_datasets/datasets/da/danish_parliament_corpus.py sha256=dj707HIbGWacP5lUTKsAsyrHp09yI-Ned5TL_IZRwOY 2190
llm_datasets/datasets/da/dk_clarin.py sha256=Aw0FYxLpUQgizd27gH4KFurPSnFfS_h-F4CU2eHLs1U 3783
llm_datasets/datasets/de/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/de/de_laws.py sha256=CF9pTeJW9nmoEjQjPKCTlPN2WKQWvpIh7jF7copcDLE 17068
llm_datasets/datasets/de/dewac.py sha256=Ie4L0rKIQqBB-stSUANFouljH5eeqqyDIkDwe3zK1H4 1656
llm_datasets/datasets/de/openlegaldata.py sha256=TwCdzcNPEdbURfCWX1Jf1xJ1QNv5XYmdlS2Ztfpp53A 3080
llm_datasets/datasets/el/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/el/greek_legal_code.py sha256=2cPTGC8_xTR136vekasGFc9Is60ivt7ynf6q85jVJXM 1778
llm_datasets/datasets/el/greek_web_corpus.py sha256=h_GRSPrCkLXw9uPx1skVme4n7aGQfkPXTAU5vzp2BrM 1563
llm_datasets/datasets/en/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/en/dialogstudio.py sha256=In9XdjqGAxZLxoFqbAe1WechUgR73kW11KdXgEj_H-o 3375
llm_datasets/datasets/en/edgar.py sha256=jII_-pE2M2Vm1lAJuA_CKcf0NCzD9QsNrUt5X5qL1OU 2543
llm_datasets/datasets/en/math_amps.py sha256=_XUQGhm3o9sWNKssRSKmf75KZz7unh6Ah888PAhLXKY 3230
llm_datasets/datasets/en/pes2o.py sha256=0ts__4QdzmFutLD4l5VnMYJTihuto4dGXklsnpBJeh8 1454
llm_datasets/datasets/en/pile_of_law.py sha256=l9bVizGrWRIHd6WkDox12DjAXjcu52iiHIUXWJRVgqg 7127
llm_datasets/datasets/en/proof_pile.py sha256=_zg0elpwCsiQmINyYTQ6G-Y6oUbYWBs-g3ouBveQaKM 995
llm_datasets/datasets/en/wikihow.py sha256=oyPP70mylRgIc0c3nq0KOMjOZaWwjfVTBwgrUstTfgw 1366
llm_datasets/datasets/es/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/es/escorpius.py sha256=0yJ4n2RgnxR35nD1-uTNYckzpmW-nu1ycMvhYEhrobM 1039
llm_datasets/datasets/es/spanish_legal.py sha256=D_Q8T-BB4hEI2xiFVPNzGrFYEFZLVTsGp9wW-svflsE 2371
llm_datasets/datasets/et/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/et/ekspress.py sha256=FbSl_ZiwkP99iqB4dftEfWb-4-otW5l3O9jQSW4va4U 4078
llm_datasets/datasets/et/enc.py sha256=t3zMOrxHCHCH8nPUQr8Cey7qCU8gHBHHO0oVm6PMSTQ 3128
llm_datasets/datasets/et/estonian_reference_corpus.py sha256=2-JqKp-2ofDr6hdJYJldK8qGL_EPYmEmTfEGMns0PsY 5058
llm_datasets/datasets/eu/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/eu/euscrawl.py sha256=Gq3BoXHJo-4WeJGkFAMJNOyx4puvtJwoLwshYSI6Q-k 1903
llm_datasets/datasets/fi/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/fi/ylenews.py sha256=QIQkD0OmFW_buf0dvcvw16kdyFDbqarmB-a3wvfRu0U 2417
llm_datasets/datasets/fr/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/fr/cabernet.py sha256=Gsynh_W6R8LcC3YCXlozXaUD7p0kEFKP-SkIeH-FcGI 4344
llm_datasets/datasets/fr/pleiasbooks.py sha256=QmRfulCunJBGt98bDk4jTHK8ogRLrbupgUoWY1Jco8g 2566
llm_datasets/datasets/fr/pleiasnews.py sha256=ueO6ODUwrE_3ZHS99oSGLeV5v9ZEGgLBbbkAyWVqwPY 2850
llm_datasets/datasets/ga/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/ga/ga_bilingual_legistation.py sha256=mAay3U700FMQoFimcmFt6Z83dmZUwq6KwKVu4ME8-BI 1376
llm_datasets/datasets/ga/ga_universal_dependencies.py sha256=70vKX2-Q33eX77Vrv2zWxbBRc0e-dve5FUsvTh3GR5w 2418
llm_datasets/datasets/hr/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/hr/croatian_news_engri.py sha256=a4HpQVHRvfXVUSc9UwN1YeHkl9z4p4ZshQoGvWOr7ls 2760
llm_datasets/datasets/hr/hrwac.py sha256=nRh1gMLrNCnp-yxO_iBOwfIzhQ7hSbHS631gU3roWx0 5283
llm_datasets/datasets/hr/styria_news.py sha256=hbwaDs51GxckjpDcogp4UmFoKZynKIo7SNANfm6oyWY 2393
llm_datasets/datasets/it/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/it/itwac.py sha256=w8Gm_780S_CYLHVGxOB5LmQdx6xKDcxKuxpgp83UOAI 1638
llm_datasets/datasets/lt/seimas_lt_en.py sha256=cyiBlzQd8G2AjimRg4HRBNKKVWVULcYDUywuhYqOAyM 1389
llm_datasets/datasets/lv/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/lv/state_related_latvian_web.py sha256=M6qLdSB6-mie_DE0KvSpzb8DBwFZcqqXF8bhwhdTKL4 1365
llm_datasets/datasets/mt/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/mt/korpus_malti.py sha256=lewxOjxSHmdMQ5Elr1RdflqVV0Pe9orfl8k7gI2qHnY 3220
llm_datasets/datasets/multilingual/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/multilingual/colossal_oscar.py sha256=98Cit7YjIS4aqj1vrY1Rt8FWU-JowkeNpvu1dFvwcWQ 11198
llm_datasets/datasets/multilingual/curlicat.py sha256=P1SoRsIWGiarXg58o4Ds9k_02DWvGHXL-Jg2Jcl55v4 10521
llm_datasets/datasets/multilingual/eurlex.py sha256=9WR9TiTRg9zi3nPR6wy8HGMDh6GWRXLn_hfNTffz75A 2164
llm_datasets/datasets/multilingual/legal_mc4.py sha256=npmnuursePAQMXWKxsE42Sx8cUZLk7ZJzCqFjtikly8 2032
llm_datasets/datasets/multilingual/macocu.py sha256=1RNoaHidLvB1XSPuvCHGHt4uWGev-YgJQ6JnIr0T8so 9358
llm_datasets/datasets/multilingual/redpajama.py sha256=_rbeGpUiZTXHKYwgUao8TOsOwjF5cV3QkTviAtCg848 3544
llm_datasets/datasets/multilingual/wikimedia.py sha256=KQ4IgLtQhHKNExsn3XmeEk1sCPJBghyaO0cHOHbwgm8 10823
llm_datasets/datasets/multilingual/wura.py sha256=XvCKCcFr4B2w6AQfRh8-hCKGr3sIC5bQzIbawO21RUM 6562
llm_datasets/datasets/nl/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/nl/sonar.py sha256=hDh8PD1fADqp_sEVICui7Y6jNhHCAprfwhp0GkOxfgM 6270
llm_datasets/datasets/nl/sonar_new_media.py sha256=_KBbXwa9TO1yOfZ02IeL6QOgYiY6Osr1_LhEvwP9_Zs 2079
llm_datasets/datasets/no/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/no/maalfrid_2021.py sha256=UEMCTyo-07baoMWPJLYeij7BgV_XDS9sTh1RESw7pbM 1306
llm_datasets/datasets/no/nak.py sha256=UZ9YVGx6hHaREGO7Hfv3QnRYqmtZ0p0pTthxESsFpzY 2721
llm_datasets/datasets/no/nbdigital.py sha256=lWoUh4AeUq0zJkI3-xwY746r10-ZhNw9G2MZ1rZv4Oc 1039
llm_datasets/datasets/no/norwegian_cc.py sha256=Y50fCW0HNCjeWusR8NtU7lywA0SkFSbXyr67dS7qPrg 2409
llm_datasets/datasets/no/parlamint.py sha256=YZQTStN8hyJma319ROIhZICZs9NUH40tcTfovUOPQvs 866
llm_datasets/datasets/no/parliamentary_proceedings.py sha256=8k_-F-MOGleACE079zhBYnv2nEyuijQm_RG4BQL5qxs 907
llm_datasets/datasets/no/sakspapir_nno.py sha256=d00QrusddI3KZ6VEDERbeav-duvKau96A2RTWCR9-rw 1050
llm_datasets/datasets/pl/luna_pl.py sha256=9yz2tRCoUhmvLXqX36svCBK8LtnRtteMvN17qSIUyeg 1477
llm_datasets/datasets/pl/pl_nkjp.py sha256=QZPPnxWM5tbObN30w1EpjGrGD03IZ35Z0TtHkFDi_OE 2948
llm_datasets/datasets/pl/pl_parliamentary_corpus.py sha256=fny4YxsQrJvucwXDZNmtZ9FgNsawA-CA3UBaa4dALkM 2567
llm_datasets/datasets/pt/brwac.py sha256=lV-53GFe0SR2LJ6stk8lqISiiiMnTtK3RzVDydvV_pA 2321
llm_datasets/datasets/pt/parlamento_pt.py sha256=hlwIG3fvy7cSjIxVq_lrvINUZERfkNlaZLMd0dM7qDA 1140
llm_datasets/datasets/ro/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/ro/marcell_legislative_subcorpus_v2.py sha256=JdWnHKjivCZtqSP8DMcOYNWD__5Y2_RGzleCClgeVhg 2553
llm_datasets/datasets/sk/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/sk/sk_court_decisions.py sha256=zDccg1Yd2WsAoISiizahuiMSkErDuSy_8R7dDiY7Dsw 3376
llm_datasets/datasets/sk/sk_laws.py sha256=5gtN0UoMAUCllWy2FtVsiQs-BEHSOheB2t2rksYApmg 3181
llm_datasets/datasets/sl/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/sl/academic_slovene_kas.py sha256=DayTpxiM5fPKbB3WZiLN7vFA2oCyjZ2gCxiteSxH7_0 2574
llm_datasets/datasets/sl/cc_gigafida.py sha256=iVGhe_0403UIiQRTLFbaTVPFQZtHT5t4Jl9LchjP52s 1825
llm_datasets/datasets/sl/slwac_web.py sha256=Gym_uWPX_Z5DUnfOvzTWAeICcV0kcVbAmidpF4KX2X4 2938
llm_datasets/datasets/sr/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/sr/srpkor.py sha256=QSfFEirvzE1-Ajrs75Ic_5cHNZLmtQO1ksSZswXjzjE 1561
llm_datasets/datasets/sv/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/sv/sv_gigaword.py sha256=pbuc--_JoKxVoviy2bHiCzGjWarXoGeRHmIySnoemcU 3549
llm_datasets/datasets/uk/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/datasets/uk/uk_laws.py sha256=M6mFoal9eissB6dwZ9qn41Gy-GiMIYj3vEhH6gRCMVo 1447
llm_datasets/io/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
llm_datasets/io/conllu_file.py sha256=A78qRSty_9N76aqAsAmBA1t1tKc1uZ_xtM4bECQuUwM 2968
llm_datasets/io/parquet.py sha256=U5N4OwpH2sMjn7rLGpupAQQ2Pv3-9YjBxhue-LIfOe0 9126
llm_datasets/io/prevert_file.py sha256=Q4sUVgp6kuuI6iJ_7UEQ4BFEr0J3SxIwJToHj0uQmUQ 589
llm_datasets/utils/__init__.py sha256=vls-DlCzirxpvMsWOEsDiz-pVbeHlJhUH8QEQlCERZw 4833
llm_datasets/utils/config.py sha256=0GxY4zeTzFo_r053BQodm38GVSsjkw6H4TCkpc_JO50 5543
llm_datasets/utils/dataframe.py sha256=CZrqUaYJTl11MdFoqLJCzNtTncqJYM1tCKKcR6_UMLw 7422
llm_datasets/utils/dataset_generator.py sha256=pyuG_RUiiF-IHX8A8woO0LdDftfCiGiwu9N3x2xKoJ0 16985
llm_datasets/utils/flatmap.py sha256=LY-ieJ1-dWKVIPHdnSVcm3JIhWy6HA7_3BhFQoMswcM 1141
llm_datasets/utils/languages.py sha256=JlKf_86NDqhY8uboI0HiMDd02-Hkllz9g2-0Oq77Yuk 4660
llm_datasets/utils/settings.py sha256=EUgM_47My5ODd7FQVqr6XXWaVTmBvOlkB7_EDTatdc0 2778
llm_datasets/utils/shuffle_big_file.py sha256=BHbK6Tt9akV_ZBGYGq72AdNCKR-4t662enGq1EwtkcA 6232
llm_datasets/utils/systems.py sha256=8NTHb8jbCMAmsc7oUensXVu1QJEQeNI2O91uCAX0__g 1210
llm_datasets/utils/wikimedia.py sha256=ZYpeV-oxz-KpPbVI02raW5whH2lAoDwUzY1NjT52lrw 28082
llm_datasets/utils/docs/__init__.py sha256=djibdtGplwcD3YGJdUxo1VsXD0NwEom0A1Z-CiZMLko 91
llm_datasets/utils/docs/plots.py sha256=A5wiqDDWdRZkMke4RHcX0yBZkxnjBO11vPRCmBTKFCA 7086
llm_datasets/utils/docs/tables.py sha256=h4YVVHVKSUKLbFzalItXyZW9E0Cp7WqWZjMaQgVnSTA 3070
llm_datasets/viewer/app.py sha256=mlcAViqkqi4Z39R18L3zBZfo1FvOHO5me-mIv8TxQ2g 14167
llm_datasets/viewer/ngrok-app.py sha256=idzbFFAY1_nzDv6WGw8-c9d2qmZAB6YvZN4qD9rsepU 1457
llm_datasets/viewer/viewer_utils.py sha256=q5a387suqaQLG0BwHIwn4H5PRz4Yk_yFR7viXzgYyP8 626
llm_datasets-0.0.3.dist-info/LICENSE sha256=eOJRZOVy_zq0mnix283vQuCf7a5-9qoM8rW3YYoMGg4 11369
llm_datasets-0.0.3.dist-info/METADATA sha256=yDRg987qs6X4Yo7EJEJGeDAIZM-BEypsc_BTnbmF1HY 11302
llm_datasets-0.0.3.dist-info/WHEEL sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ 92
llm_datasets-0.0.3.dist-info/entry_points.txt sha256=Mq-eECligKfe5G38i2D8YLTBHoBiouBkLMg8gWnY54A 60
llm_datasets-0.0.3.dist-info/top_level.txt sha256=tHwIOEL_A8kIEIEhpsk-lBAhvGt0XVC7VVwVDg0fNn4 13
llm_datasets-0.0.3.dist-info/RECORD

top_level.txt

llm_datasets

entry_points.txt

llm-datasets = llm_datasets.__main__:main