invisible-rabbit
View on PyPI — Reverse Dependencies (0)
0.5.0 | invisible_rabbit-0.5.0-py3-none-any.whl |
Wheel Details
Project: | invisible-rabbit |
Version: | 0.5.0 |
Filename: | invisible_rabbit-0.5.0-py3-none-any.whl |
Download: | [link] |
Size: | 337560 |
MD5: | 70faedb35fc2d3d6c51c6d8b43124c34 |
SHA256: | 32e6f913b882fec613c3dd96c6c0c54b0828b81c637bba8c1349aede60d30989 |
Uploaded: | 2024-10-29 18:39:52 +0000 |
dist-info
METADATA · WHEEL · RECORD · top_level.txt · entry_points.txt
METADATA
WHEEL
Wheel-Version: | 1.0 |
Generator: | setuptools (75.3.0) |
Root-Is-Purelib: | true |
Tag: | py3-none-any |
RECORD
Path | Digest | Size |
---|---|---|
nemo_curator/__init__.py | sha256=4NZHY4ji7hF4So0yI3YgZv1hPIsrr5DjtvdW2xr6GRw | 1726 |
nemo_curator/_compat.py | sha256=PmCRFwbZa9ICvHE7ccxsbkx6kNOzsdu0K0y4sM_3k6o | 1102 |
nemo_curator/log.py | sha256=RoRApb4jGYC2TaIWZy8SnYi4v7CO6r31FYBCO15jCss | 2937 |
nemo_curator/sample_dataframe.py | sha256=553_PeN7UlAjbcIZNDeZxmFmUJgqDB_0O86ML4SwUWA | 2835 |
nemo_curator/classifiers/__init__.py | sha256=rXwCkT6jqQzHnrMbFkCxp5iY6XAHUw4EwusBatTmczY | 935 |
nemo_curator/classifiers/aegis.py | sha256=NuP3zB3omaLCW6Rjd6okpjapYSeXiN_sRa-IDSLb8zo | 11224 |
nemo_curator/classifiers/base.py | sha256=wHNF9vgw09KCLN2EdfWBXiNnInyg1ELCp99-LjrJyDw | 5401 |
nemo_curator/classifiers/domain.py | sha256=qZx0sE1juFbmoapM2taWf1eJWzliW5aVaWCzY7Nki1U | 4774 |
nemo_curator/classifiers/fineweb_edu.py | sha256=suWtYM83Rn2WsRCaD96tyYwKwTC5teuSTtWXwS9vxgk | 5574 |
nemo_curator/classifiers/quality.py | sha256=e2VPXDao-kbk_EPGppJ5KGZXFywmj1Adj98S5E2SrZE | 4780 |
nemo_curator/datasets/__init__.py | sha256=mGlpzirGyfgdKLwy4-8hh-Wk1z7qveVDRuDqYsVYZTY | 901 |
nemo_curator/datasets/doc_dataset.py | sha256=KZ5XzeBGc99wFlWhCftUbf6CK7l5EkfqGCKSawuCiS8 | 7545 |
nemo_curator/datasets/image_text_pair_dataset.py | sha256=gVItCnI67StRSEh7BE76cfSMHTKUjDzllcqpbiKqVOY | 8247 |
nemo_curator/distributed_data_classification/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/distributed_data_classification/arg_utils.py | sha256=74BRZ3vbdXUBa7LV61Hgkoo7sG7-CSrgWypH7mpDQ6Y | 4754 |
nemo_curator/distributed_data_classification/domain_classifier_inference.py | sha256=ULa_G-JdCpz9xBU4y1cGepxQuHTWX4UuHozMC9Neo20 | 8478 |
nemo_curator/distributed_data_classification/generate_statistics.py | sha256=CNOIjtqBN_Zj71HWJ_ECh3i2ejqDKWMtSqLNNixIf70 | 2691 |
nemo_curator/distributed_data_classification/pytorch_utils.py | sha256=Ja3rpvDCzSMfwHBlISZWRM4CMSgbvRpGqA5LWdTf2gU | 3678 |
nemo_curator/distributed_data_classification/quality_classifier_inference.py | sha256=CiUzmKfJzaDWPnueX7zsqnLz6ai44exuq5T8cwdzZ1Y | 10103 |
nemo_curator/distributed_data_classification/quality_classifier_multiple_models_inference.py | sha256=qSeuowrLjiql0NgJkc3t-HmdSd1nLAhsYWbTqzLFQ9I | 5153 |
nemo_curator/distributed_data_classification/verify_results.py | sha256=hLh-p67CcZXM_qxb6cq4ZDxrajKCXdwG95rUCUPQ-4Q | 6065 |
nemo_curator/download/__init__.py | sha256=9lBlOthqw-Qd1P93Hn1VmjAfM57PStMfQ8teCf-V-Sk | 1887 |
nemo_curator/download/arxiv.py | sha256=12DUsllmKr2DIcurKRJYHNNzewjolrU3PzT2UjRe3ZM | 15523 |
nemo_curator/download/commoncrawl.py | sha256=r6HIH0XQ9IZssSGoKXCTV9Llef0T6gFL_H3y-5WGDtc | 15562 |
nemo_curator/download/doc_builder.py | sha256=WRYD-vq9lDwhuOyv1o17oqcYkMafFfhhKTBs4HdrkDs | 7372 |
nemo_curator/download/thai_stopwords.py | sha256=oIf0Y2NK9Ig1oSsk6rUcVb4ZHgYPWrtyT_PHf5akIws | 2715 |
nemo_curator/download/wikipedia.py | sha256=nVSbpmW7QhNg9eGJAA4BOAHLptJuhctllEFaRDfZWLg | 31193 |
nemo_curator/filters/__init__.py | sha256=2oFTNdkjy5pJboO23HpqhiDdjf2gTHcq-jIsbmeDZM4 | 2525 |
nemo_curator/filters/base.py | sha256=OVqOWm08cxhlSMTEPPNQX7opnnpy0EWwI3AgXMbMLyI | 8048 |
nemo_curator/filters/classifier_filter.py | sha256=La51m4_HfUTW4vtXku6AtirAzAuqdsxpkedsqpXGkWE | 3436 |
nemo_curator/filters/code.py | sha256=Gzptvoq3f2_wWC37_ZCYo4LAwuzaGX7_fNx-BSY_Gl0 | 10656 |
nemo_curator/filters/doc_filter.py | sha256=bjp7vy_B1FA5dUkslFg5u7aOoH0ifBNdIhFWOdbINKU | 4174 |
nemo_curator/filters/heuristic_filter.py | sha256=7XnaQBbH5hmf73PP4reuA2RdyEF5XpglkPsxj3ETTSA | 20001 |
nemo_curator/image/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/image/classifiers/__init__.py | sha256=hzZMZAKaYrifBfy067gYW0zym2onTKAA4BAhS1EFKz8 | 792 |
nemo_curator/image/classifiers/aesthetic.py | sha256=xk1ffms87lLWiOh3kLV2nKrfVFkImaRwYZxllj39fpk | 3457 |
nemo_curator/image/classifiers/base.py | sha256=DOUXhXh64tLrAwlHffBUmpXWvILlWUnVBnabcDxQX_0 | 3790 |
nemo_curator/image/classifiers/nsfw.py | sha256=0kQO9y-kT1KEMCUMRQkhRpbT-TR06FKfaRjh7WE0UKQ | 3855 |
nemo_curator/image/embedders/__init__.py | sha256=PJJYO1NFEqffgd2lMU4I84tJYXp8Px-flCP7-aQvnCw | 728 |
nemo_curator/image/embedders/base.py | sha256=3FHe7euyGgPg0-_-nrrQ_JE2odMAvdFdJ9DTrDjmXwk | 4904 |
nemo_curator/image/embedders/open_clip.py | sha256=5yAzaQZNCQmnO05g-_9NRE0EiTYPXYNk-0m6Iq54PJ8 | 4544 |
nemo_curator/image/embedders/timm.py | sha256=1qFqzR7MG7vYNSOQ427yN64d-CsV_87aSHoB7WOUTtM | 5259 |
nemo_curator/modifiers/__init__.py | sha256=S_fHYgbQOezDHVA65kV-AR2HMzPn9-G482Rc-_vqNN0 | 976 |
nemo_curator/modifiers/base.py | sha256=AuoPtDH_ps94jtSilp7YlC7l0ZisW5W5lITOZP_CPAo | 1847 |
nemo_curator/modifiers/c4.py | sha256=U25SZoeVOcbMRJ4kRsb8JsI8S6IQv1bBt--Q3MfdqLk | 3273 |
nemo_curator/modifiers/doc_modifier.py | sha256=i6lDpiCybvQP_u6ea2-dC1X8Ym2CTRImPmtQBlxA5HU | 936 |
nemo_curator/modifiers/fasttext.py | sha256=aUg7jTcS1OvG_BKG1PUexzc0JcxEF4jDgeGoy0yx8XI | 938 |
nemo_curator/modifiers/pii_modifier.py | sha256=zZbsc3BkZjSuw-x3VMYUci7CSmdQ2YPjIC_z-fMWeck | 3728 |
nemo_curator/modifiers/unicode_reformatter.py | sha256=nz-vfzMqcOHpoCY6KSkwQ1nLmhA1UxOBGb0B7wgeYHE | 846 |
nemo_curator/modules/__init__.py | sha256=fpR-OCJP7cHBBr9RLK8dVeF9DC638r6OfjNHouVwXAA | 2684 |
nemo_curator/modules/add_id.py | sha256=sMUyt3c_puGsxqpu10QeNOXhXHprg1oDxOvDJkXY-lg | 3377 |
nemo_curator/modules/base.py | sha256=bKwItHoKT_FEleSdtN6fKccacoz3rc8NvhXzDBzHz0Y | 1907 |
nemo_curator/modules/config.py | sha256=f_QwO0dZsAV7578FyWDXonqUhY15JVHWS9soNfZWTIg | 7056 |
nemo_curator/modules/dataset_ops.py | sha256=IMUPEE2kCQZduqPyozQwQvLSqA93JULjoOVC7XkMAro | 7170 |
nemo_curator/modules/distributed_data_classifier.py | sha256=FFoniNNy3reqCq4LhobAtVDbHJuTOksPb_TVSCP8Sqs | 11799 |
nemo_curator/modules/exact_dedup.py | sha256=JpDbBsdlvgZ09iPfwjpUFhhXqNeLVw4K0uSAWZJYdLM | 6406 |
nemo_curator/modules/filter.py | sha256=GqmnO-4yG0KHZq0ho11KzCR4oY2T-z5wDUDyIUBqzms | 7373 |
nemo_curator/modules/fuzzy_dedup.py | sha256=OFeIJO0RynYg8an9_aqAldjGm8n89GkBL4RXJcs1jfA | 63056 |
nemo_curator/modules/meta.py | sha256=9q-4cd1pZQm4T5D-U_oydoBtavJDxR_ssAZy32KS0pI | 825 |
nemo_curator/modules/modify.py | sha256=-vuGZy_hhIN963Eq3aIhhCVbHOYOV8nle-Ik8VGS8CQ | 1406 |
nemo_curator/modules/semantic_dedup.py | sha256=nH-8jPHxau6EZJg8QGYeO0ZmvyLgLvGLxo44fBOXTYA | 23033 |
nemo_curator/modules/task.py | sha256=4xpnykbH9qQiyODjwVMlR8GlT6xCEAgk9tWBtPv3OCo | 18766 |
nemo_curator/modules/to_backend.py | sha256=JRrK8ItVPRzbUcbw1_u67raIUcnikBcDXfdRJ96jIzE | 1002 |
nemo_curator/nemo_run/__init__.py | sha256=SsipNxmMuUKT-P22M7z3i2K10gSybbAuHklWQgxl9Zs | 675 |
nemo_curator/nemo_run/slurm.py | sha256=WySsj9PYVhnMJZAt_4kOqPLyRsnr0JYnizSAL6d4oms | 5298 |
nemo_curator/nemo_sdk/__init__.py | sha256=SsipNxmMuUKT-P22M7z3i2K10gSybbAuHklWQgxl9Zs | 675 |
nemo_curator/nemo_sdk/slurm.py | sha256=44bf16BTXoFBf6RJhu_n402DVI8o09wlakZ522U9wtU | 5298 |
nemo_curator/pii/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/pii/algorithm.py | sha256=vQyyewAniVQ0xpCPm61IdjQwfZWm9MLcs7etuT96ncI | 9843 |
nemo_curator/pii/constants.py | sha256=FhrU0UmUUHE6LIxDNAP4WQuY3Dl48d0bdBwr5sM-TVg | 369 |
nemo_curator/pii/custom_batch_analyzer_engine.py | sha256=bc5EmJ5BZs07C6Y2Z-rlX6FagC1R29hjxDyvisrbsFY | 6668 |
nemo_curator/pii/custom_nlp_engine.py | sha256=1ZPR8gxCskO7pajgSFgeOz_eAERJjNV12V1Oqqz9rJ8 | 2686 |
nemo_curator/pii/recognizers/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
nemo_curator/pii/recognizers/address_recognizer.py | sha256=Pc_lSuT-J5asHQQ9So5GW-2yoSDoIqoQzSsjtkyg9uY | 1828 |
nemo_curator/scripts/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/scripts/add_id.py | sha256=8iLkTCEHyc3MXdA2082UkVUwKjCZdGT4CpkwnAiU6zE | 4018 |
nemo_curator/scripts/aegis_classifier_inference.py | sha256=a-Jy4CGFHYD8U2UG8BkHSpXrzCuET8fRY_Hczw-vOQw | 4218 |
nemo_curator/scripts/blend_datasets.py | sha256=PYdps-cilBzHPkIMV5HYZCqVpJ2NYwEhoiOttCkV89w | 3715 |
nemo_curator/scripts/domain_classifier_inference.py | sha256=dneASqLn-ghTo7D2tfWueZTbItgkinyv5dqhIQASfW0 | 4073 |
nemo_curator/scripts/download_and_extract.py | sha256=8vCS5wL9cvpYG_jzucAEBUL0XY77ru7qG64WvgBkDD0 | 5814 |
nemo_curator/scripts/filter_documents.py | sha256=rWBjwmUlkw95CULJ6DdjE0Vp0-vUFVMMZSsAwSvHzjA | 11057 |
nemo_curator/scripts/find_exact_duplicates.py | sha256=EN5Hjs35RuH48iN7VQlLbgFNQd1IH8vwuouE43Fv0xs | 3962 |
nemo_curator/scripts/find_matching_ngrams.py | sha256=jPpnh_vHFeG7sPXwUNw2WL2oyBugaKJ7wBzuZZaxTi8 | 3335 |
nemo_curator/scripts/find_pii_and_deidentify.py | sha256=YzA7kog983oJanNaKWeERcDlPAaZO039QY2VzCU0PUM | 5536 |
nemo_curator/scripts/get_common_crawl_urls.py | sha256=jZVTY2GJXipO-WqiTYP830AzuecNtjvRnsAEKesZV0g | 3479 |
nemo_curator/scripts/get_wikipedia_urls.py | sha256=VcBW9tcmVxRHx7ijWFPf_2kmWF6LLkdjZFhLSHA_zwI | 1862 |
nemo_curator/scripts/make_data_shards.py | sha256=QFykRSrrur3KjziOnxkbDX9pppJ06qyi0riLQiqKFmw | 2589 |
nemo_curator/scripts/prepare_fasttext_training_data.py | sha256=-H7gHQnojdRc1EdxCt2NKJRn2giNi533pMujOli8lhA | 3521 |
nemo_curator/scripts/prepare_task_data.py | sha256=9RzN261e1x4b3L7tORJF8maAJh2ZW4hXJJjQkZNQMjs | 3034 |
nemo_curator/scripts/quality_classifier_inference.py | sha256=HxhoPlAUJGa64a7-8KMKD15eMd44WUhDOw5cp1EtKgA | 3887 |
nemo_curator/scripts/remove_matching_ngrams.py | sha256=GEXccxDhUxy2vvjkqO3IF5Q8wW99g4Kn9TqyzKJKwtE | 5306 |
nemo_curator/scripts/separate_by_metadata.py | sha256=1e9NAYOkgidEXoLXb5u7DOyrS2ayExDEHaHtADbuzKc | 3732 |
nemo_curator/scripts/text_cleaning.py | sha256=eha9Gztu3wx3E0QJgRB_2XMoaFbdu8sOJOuQOLeJpDQ | 2980 |
nemo_curator/scripts/train_fasttext.py | sha256=bTPRQVxx2xQ37NH_wci6Kw_JtZvIX5JrZemV7Qbqzmg | 6420 |
nemo_curator/scripts/verify_classification_results.py | sha256=dQ1PfqDnOuh_Hrb8XXqkiLL7KWJH3ghVU3PtiWaii-I | 6615 |
nemo_curator/scripts/classifiers/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
nemo_curator/scripts/classifiers/aegis_classifier_inference.py | sha256=4PSbbrTjykhMU2_umVv39JHZJV-Mlzx1Vsf5j0OyRes | 4263 |
nemo_curator/scripts/classifiers/domain_classifier_inference.py | sha256=1Zn2XdqnXHbZGWiJzQEtXna-1vRhoeBZC7FPutugk-U | 3590 |
nemo_curator/scripts/classifiers/fineweb_edu_classifier_inference.py | sha256=2OEAygSnOVG6klCLisueVjNMzwfcAEBt33lY00-J37E | 3610 |
nemo_curator/scripts/classifiers/quality_classifier_inference.py | sha256=IfnXekVPT-zHkkYAvUoKm4pHWY-tEhPP9OdHyy54bVQ | 3592 |
nemo_curator/scripts/fuzzy_deduplication/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
nemo_curator/scripts/fuzzy_deduplication/buckets_to_edges.py | sha256=TW1A82Q-ntwTnRGYM4XcZINUbCeji2FWfLicOAkhVW4 | 2833 |
nemo_curator/scripts/fuzzy_deduplication/compute_minhashes.py | sha256=wCPXHP_iR_SQyJLjN0DsL1s4LtqfvqXoY3lfx15LbVE | 5382 |
nemo_curator/scripts/fuzzy_deduplication/connected_components.py | sha256=-EFL3HRP7bJZtRReNarcQvC3c4-zovS_sBBLCKTgV3A | 2733 |
nemo_curator/scripts/fuzzy_deduplication/jaccard_compute.py | sha256=9nhJ3JBrZQ96mIlvYlS_TNYLz2kTIam98hh3ziBpZuQ | 2779 |
nemo_curator/scripts/fuzzy_deduplication/jaccard_shuffle.py | sha256=8H2ydU_2v9JlfYnK9PXdE2PtCyRVqwNg5PwrqryzvOU | 4122 |
nemo_curator/scripts/fuzzy_deduplication/map_buckets.py | sha256=VAaRiGTf79l4h1K5P0AjrniGz2YJRprt7NICZe3cLIk | 5753 |
nemo_curator/scripts/fuzzy_deduplication/minhash_lsh.py | sha256=65ZJPXtprkM6nZ3rk4KVFF_FSHUMbHGbiAc4gdhghSA | 3871 |
nemo_curator/scripts/semdedup/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
nemo_curator/scripts/semdedup/clustering.py | sha256=Lo3puipI8XBbZy2AYtqsJD5QrCsw8udUApK9Dby-yEc | 3857 |
nemo_curator/scripts/semdedup/compute_embeddings.py | sha256=kPz-7TmA-02sTe5UJkWgo7TMmP9BAZlCoBrEuk5536k | 5084 |
nemo_curator/scripts/semdedup/extract_dedup_data.py | sha256=WKR8xnd036b8qPys07UI-8NRIBBmQV0nu3jp3Gn9XdY | 3288 |
nemo_curator/services/__init__.py | sha256=B3D0AylSmuOOruKFvG_vqV78f9OuR52mDGTWa4npDoQ | 973 |
nemo_curator/services/conversation_formatter.py | sha256=JRMOpMCMrCz2nN4BwHvcHigi_xJTrw6lo6MTlzGN2kQ | 1022 |
nemo_curator/services/model_client.py | sha256=1OEgNbF9vgeso9d64G4cKiGL9ZfxU5Csue0hOOHwi_0 | 2994 |
nemo_curator/services/nemo_client.py | sha256=uVN8xhRVsKr7iD20xJ8j-ZfHWzlsOUSnIkvhf1xF0v4 | 3626 |
nemo_curator/services/openai_client.py | sha256=z4Kqvs1ItyvhnbPJJB-Idpvys6iNUyr5ai_IayjwQHw | 6035 |
nemo_curator/synthetic/__init__.py | sha256=EskbOiR6d4VCMNgR50IIo-mgjc4SDnaQ2ru-4x7iw3E | 3072 |
nemo_curator/synthetic/async_nemotron.py | sha256=Meg3F9-gTt3q0_n59dQoVMIJVsbhE2xdTDSe7sflroo | 80632 |
nemo_curator/synthetic/conversation_formatter.py | sha256=JRMOpMCMrCz2nN4BwHvcHigi_xJTrw6lo6MTlzGN2kQ | 1022 |
nemo_curator/synthetic/error.py | sha256=EWGJNnhanzilxsUrlMKOgcv2fJ_9YjunOFIpLpteOzQ | 803 |
nemo_curator/synthetic/mixtral.py | sha256=CUKR-bmD3qYayxtK08987A7AepAa5jbzvGfoltxzSZ4 | 1319 |
nemo_curator/synthetic/nemotron.py | sha256=m1S03olbsjYmNcv9XF5P2xUF5AtF_wk0IshAGtIgMzk | 71704 |
nemo_curator/synthetic/no_format.py | sha256=TjJoa04f_VQTczgAYf67p5XnGB0oKATXFRQ6SXSYTAk | 1194 |
nemo_curator/synthetic/prompts.py | sha256=DjsQRoi4iqQX9FNeWFEVoKC0TAKGFurCqoEt-Gxgwyg | 8198 |
nemo_curator/synthetic/ultrachat.py | sha256=kYDtDPAhYhAprirbc8yyRkaX873wtGrRttdMjTOX33o | 2317 |
nemo_curator/tasks/__init__.py | sha256=IfaRyarhNVd7icbFyiT_fBnWoY38iCcIqZoFR019Wis | 1499 |
nemo_curator/tasks/downstream_task.py | sha256=krVCvrbuZy9ToNyImYXpxUl03hBCnPVlWcrOXWvCwPY | 1968 |
nemo_curator/tasks/metrics.py | sha256=MgBEr9mC9KEeDRfmW32_k4a06f6ljWveyC9LBLtAG0Y | 18930 |
nemo_curator/utils/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/utils/aegis_utils.py | sha256=LEwVKVFFP7pzBciaWaAL5O1hzZ_yE4AI5ik7-DD70so | 10140 |
nemo_curator/utils/config_utils.py | sha256=0_Uy16dEXqqTeqiQsBCRzD8E_wzMYOYvz0_jkC-ZGhU | 3598 |
nemo_curator/utils/constants.py | sha256=Szr4RTF8hFFkTqz5dhh_IbTReZfBXFLlhyokggQ_bvI | 3296 |
nemo_curator/utils/cudf_utils.py | sha256=tBF12by81jGwAx-lBEGYvGniJdYLIMDIN1GtROfkh90 | 1543 |
nemo_curator/utils/decorators.py | sha256=t0gGx4HhyEJntCk5XAAaoePmoahSyTNjhqtEpGeeHrY | 864 |
nemo_curator/utils/distributed_utils.py | sha256=Fk1jMgCuZhMWfX8y0JWMKoAjUjg-PuXsRPVQtC0bpwE | 22941 |
nemo_curator/utils/download_utils.py | sha256=i97IsKFyOCWrZa6JuZh84ugabIO9LHkKxIp7fzAR9ic | 7458 |
nemo_curator/utils/file_utils.py | sha256=W_8--WVP5Gr8vNTKfWkw_P3nKgDq30_ZOEg8qWgiqcY | 8660 |
nemo_curator/utils/gpu_utils.py | sha256=3hZWSm_aQdxyIyOjf2vsuHo3QT9qzKNh63ZPzvuTohE | 1113 |
nemo_curator/utils/import_utils.py | sha256=Zj9e1-bFTfCLk1B_GmnvOEvLt3mpqcCaIkMgbZorWS8 | 13261 |
nemo_curator/utils/module_utils.py | sha256=aaTOspYbMLpGby-gd-wcVKhVv_eBbtYpPd4mjMiRgSM | 779 |
nemo_curator/utils/script_utils.py | sha256=iT4Hypn179iWDApeCwa5Uf_ynfIuFJmQLz-0p0IyO-0 | 19792 |
nemo_curator/utils/semdedup_utils.py | sha256=Iz05or3tCULitP36NW2Fjr365DI3UoUfh5R-kP8nNSg | 15506 |
nemo_curator/utils/text_utils.py | sha256=OMFateEXEDtlsIrZov7J_6CXYI00M0AAfXs_m-wxiM0 | 5920 |
nemo_curator/utils/fuzzy_dedup_utils/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/utils/fuzzy_dedup_utils/id_mapping.py | sha256=05MwMpEDBycgB9rESP5yIZSbZDqAW4tb3t2zJLKSEtM | 1862 |
nemo_curator/utils/fuzzy_dedup_utils/io_utils.py | sha256=SZddQUZWUI47b_oRNfhy3myBH1zEHitJ5JaU6qetNbc | 6292 |
nemo_curator/utils/fuzzy_dedup_utils/merge_utils.py | sha256=PQpt26iyJQhBiBdjA1n3fVMM-HYxJrU1YeTGucInxUA | 7800 |
nemo_curator/utils/fuzzy_dedup_utils/output_map_utils.py | sha256=XzhhT-dP885ZQW5Y37jCvMz4D0DKS_Lkg7V3UqQaoEg | 2588 |
nemo_curator/utils/fuzzy_dedup_utils/shuffle_utils.py | sha256=tDUPP5l6VBId0ubmGyLZ_oEldOazYoFGFM3rQFcvbes | 5223 |
nemo_curator/utils/image/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/utils/image/transforms.py | sha256=piLOcV_iG48lCnfJkBstqqUSlDico0hNaxiMzJ9YeHQ | 3561 |
tests/__init__.py | sha256=Gl7X71vLknbUhnkfFuOWJvs_by-CkQjqERM3FQp24Uo | 1253 |
tests/test_add_id.py | sha256=54wB9j5C79YGEuoMUBNW4e4gHHAwXNLJr4fET1-IpDk | 4987 |
tests/test_blend_datasets.py | sha256=X4Xu9jTroU0dbu4WFkvHeVqePxj_ncASJZfLpIulyWA | 3878 |
tests/test_config.py | sha256=5sx7G-ATi0JJ35UeLWWlOLIFHibuvpb3trpMC40GV5o | 2716 |
tests/test_dataset.py | sha256=eD01HC-wwoCqLP4zGNn1N__nv-NRqIAd7ey9HdBGD0I | 858 |
tests/test_download.py | sha256=j4ZgG_sa0Dq1UjvWmFgw8fL0wGh0ZIukB99je3cGCkU | 6538 |
tests/test_exact_dedup.py | sha256=hBLdCU7qBUxaePygjtT4QhsxK5Pcyf8x5b6mIBr3U_Y | 1900 |
tests/test_filters.py | sha256=c-dXkxoVgrEXo5DobbKwl5Z83gTLM801OMv6flznS1w | 30973 |
tests/test_fuzzy_dedup.py | sha256=uKWz8DrBGqu3WdXVj__qaWX8NHAoglu48spcNHdwXYc | 18355 |
tests/test_io.py | sha256=szxF9ODvj390Hc800CQpW1MUhOMV7sHU328vog8a5W0 | 7819 |
tests/test_pii_accuracy.py | sha256=vsjKth8M73To0UkMJvhR0WpE6jsZLMZH4zsyXrHa9y0 | 6376 |
tests/test_semdedup.py | sha256=avP_M3mlDE_8cXUpZbhzXmr2VRnuaQWN86HLZsshfdM | 2796 |
tests/test_separate_by_metadata.py | sha256=rk3lPqWc706pe3GNDd_5Sx5g3gTNi3zLHj6qylYjZIY | 2574 |
tests/test_seperate_by_metadata.py | sha256=FGXzOXXDys7zXzyXkY2JuwMlV0Jp8fs26gOzi6IcfuA | 2566 |
tests/test_shuffle.py | sha256=JaX3Psby4bEIe0-BNOCDvnrHP9rnXk2EbC-MY5x6hQo | 7389 |
tests/test_task_decontamination.py | sha256=RFj0x7rkl_pQEGf3RI4KVB4T4gADjX4Xf3FOBHJnCyA | 12881 |
tests/test_unicode_reformatter.py | sha256=iALTzl-G1ggy9ZjiDWqhw9ub1x3DqZCQro4S1UVR0YA | 2082 |
invisible_rabbit-0.5.0.dist-info/LICENSE | sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ | 11357 |
invisible_rabbit-0.5.0.dist-info/METADATA | sha256=sdigFnRPKSXErIo7OZ6llFd9ZIIghMA9ycSbHghwFBs | 16628 |
invisible_rabbit-0.5.0.dist-info/WHEEL | sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc | 91 |
invisible_rabbit-0.5.0.dist-info/entry_points.txt | sha256=bO9DqbxdYCfbcgzT3vs2uZL8jahbKgV4wCl2QCgxngE | 2538 |
invisible_rabbit-0.5.0.dist-info/top_level.txt | sha256=NZpMgId9Qc8gKXBh5ITLq99W4VsNI3MN1cG7f0hm_n0 | 19 |
invisible_rabbit-0.5.0.dist-info/RECORD | — | — |
top_level.txt
nemo_curator
tests
entry_points.txt
add_id = nemo_curator.scripts.add_id:console_script
aegis_classifier_inference = nemo_curator.scripts.classifiers.aegis_classifier_inference:console_script
blend_datasets = nemo_curator.scripts.blend_datasets:console_script
deidentify = nemo_curator.scripts.find_pii_and_deidentify:console_script
domain_classifier_inference = nemo_curator.scripts.classifiers.domain_classifier_inference:console_script
download_and_extract = nemo_curator.scripts.download_and_extract:console_script
filter_documents = nemo_curator.scripts.filter_documents:console_script
find_matching_ngrams = nemo_curator.scripts.find_matching_ngrams:console_script
get_common_crawl_urls = nemo_curator.scripts.get_common_crawl_urls:console_script
get_metadata_from_corpus = nemo_curator.get_metadata_from_corpus:console_script
get_wikipedia_urls = nemo_curator.scripts.get_wikipedia_urls:console_script
gpu_compute_minhashes = nemo_curator.scripts.fuzzy_deduplication.compute_minhashes:console_script
gpu_connected_component = nemo_curator.scripts.fuzzy_deduplication.connected_components:console_script
gpu_exact_dups = nemo_curator.scripts.find_exact_duplicates:console_script
jaccard_compute = nemo_curator.scripts.fuzzy_deduplication.jaccard_compute:console_script
jaccard_map_buckets = nemo_curator.scripts.fuzzy_deduplication.map_buckets:console_script
jaccard_shuffle = nemo_curator.scripts.fuzzy_deduplication.jaccard_shuffle:console_script
make_data_shards = nemo_curator.scripts.make_data_shards:console_script
minhash_buckets = nemo_curator.scripts.fuzzy_deduplication.minhash_lsh:console_script
prepare_fasttext_training_data = nemo_curator.scripts.prepare_fasttext_training_data:console_script
prepare_task_data = nemo_curator.scripts.prepare_task_data:console_script
quality_classifier_inference = nemo_curator.scripts.classifiers.quality_classifier_inference:console_script
remove_matching_ngrams = nemo_curator.scripts.remove_matching_ngrams:console_script
semdedup_clustering = nemo_curator.scripts.semdedup.clustering:console_script
semdedup_extract_dedup_ids = nemo_curator.scripts.semdedup.extract_dedup_data:console_script
semdedup_extract_embeddings = nemo_curator.scripts.semdedup.compute_embeddings:console_script
separate_by_metadata = nemo_curator.scripts.separate_by_metadata:console_script
text_cleaning = nemo_curator.scripts.text_cleaning:console_script
train_fasttext = nemo_curator.scripts.train_fasttext:console_script
verify_classification_results = nemo_curator.scripts.verify_classification_results:console_script