invisible-unicorn
View on PyPI — Reverse Dependencies (0)
0.4.0 | invisible_unicorn-0.4.0-py3-none-any.whl |
Wheel Details
Project: | invisible-unicorn |
Version: | 0.4.0 |
Filename: | invisible_unicorn-0.4.0-py3-none-any.whl |
Download: | [link] |
Size: | 259491 |
MD5: | 14515654d90689be05255fa327074b62 |
SHA256: | 9c3d3df4b77b987da29078f4419a9fea03bcefdb4eb20fd0e666666f76f6ee3e |
Uploaded: | 2024-10-03 19:05:10 +0000 |
dist-info
METADATA · WHEEL · RECORD · top_level.txt · entry_points.txt
METADATA
WHEEL
Wheel-Version: | 1.0 |
Generator: | setuptools (75.1.0) |
Root-Is-Purelib: | true |
Tag: | py3-none-any |
RECORD
Path | Digest | Size |
---|---|---|
nemo_curator/__init__.py | sha256=4NZHY4ji7hF4So0yI3YgZv1hPIsrr5DjtvdW2xr6GRw | 1726 |
nemo_curator/_compat.py | sha256=KD0x4kI45L0vTzlt2cV-kmRk0zA-jDQamjwvWurVLJg | 967 |
nemo_curator/log.py | sha256=RoRApb4jGYC2TaIWZy8SnYi4v7CO6r31FYBCO15jCss | 2937 |
nemo_curator/sample_dataframe.py | sha256=553_PeN7UlAjbcIZNDeZxmFmUJgqDB_0O86ML4SwUWA | 2835 |
nemo_curator/datasets/__init__.py | sha256=jb_GcqbWfrisj9nqglR0faq0iaMNh0fhKJ-ZTw71_7k | 683 |
nemo_curator/datasets/doc_dataset.py | sha256=KZ5XzeBGc99wFlWhCftUbf6CK7l5EkfqGCKSawuCiS8 | 7545 |
nemo_curator/download/__init__.py | sha256=9lBlOthqw-Qd1P93Hn1VmjAfM57PStMfQ8teCf-V-Sk | 1887 |
nemo_curator/download/arxiv.py | sha256=3KaaccswHti-ZCQ2ev1FxCB0jf58xTCY70BX_L3Sw5g | 16228 |
nemo_curator/download/commoncrawl.py | sha256=r6HIH0XQ9IZssSGoKXCTV9Llef0T6gFL_H3y-5WGDtc | 15562 |
nemo_curator/download/doc_builder.py | sha256=WRYD-vq9lDwhuOyv1o17oqcYkMafFfhhKTBs4HdrkDs | 7372 |
nemo_curator/download/wikipedia.py | sha256=nVSbpmW7QhNg9eGJAA4BOAHLptJuhctllEFaRDfZWLg | 31193 |
nemo_curator/filters/__init__.py | sha256=2oFTNdkjy5pJboO23HpqhiDdjf2gTHcq-jIsbmeDZM4 | 2525 |
nemo_curator/filters/classifier_filter.py | sha256=La51m4_HfUTW4vtXku6AtirAzAuqdsxpkedsqpXGkWE | 3436 |
nemo_curator/filters/code.py | sha256=Fx-8hHHL85VxsgcKZ2eX7Cai-5TCqgLOzfnH-_Hc7wE | 10712 |
nemo_curator/filters/doc_filter.py | sha256=9T2yC0F1-g7B9m73_BpPVrqZXH3oSVePQdcoWC5Uak4 | 2030 |
nemo_curator/filters/heuristic_filter.py | sha256=bBBxfVALXBdZGmUGCNjPl_xROSybYNzL-Ct7nLHHmd8 | 19992 |
nemo_curator/modifiers/__init__.py | sha256=S_fHYgbQOezDHVA65kV-AR2HMzPn9-G482Rc-_vqNN0 | 976 |
nemo_curator/modifiers/c4.py | sha256=U25SZoeVOcbMRJ4kRsb8JsI8S6IQv1bBt--Q3MfdqLk | 3273 |
nemo_curator/modifiers/doc_modifier.py | sha256=i6lDpiCybvQP_u6ea2-dC1X8Ym2CTRImPmtQBlxA5HU | 936 |
nemo_curator/modifiers/fasttext.py | sha256=aUg7jTcS1OvG_BKG1PUexzc0JcxEF4jDgeGoy0yx8XI | 938 |
nemo_curator/modifiers/pii_modifier.py | sha256=zZbsc3BkZjSuw-x3VMYUci7CSmdQ2YPjIC_z-fMWeck | 3728 |
nemo_curator/modifiers/unicode_reformatter.py | sha256=nz-vfzMqcOHpoCY6KSkwQ1nLmhA1UxOBGb0B7wgeYHE | 846 |
nemo_curator/modules/__init__.py | sha256=anecAKbLHiZ7jpxRhFOw2Dm5jIXlMyg-TLrcyE84JnQ | 2690 |
nemo_curator/modules/add_id.py | sha256=3D5_xKM4LwCqPieAD8dE2h65GuvOG2z55MBs-a6W_Pw | 3341 |
nemo_curator/modules/config.py | sha256=T4TRqE7JvUSagy5iVgFTpLgX2XEgXNm2S7JWKc1iQig | 6546 |
nemo_curator/modules/dataset_ops.py | sha256=IMUPEE2kCQZduqPyozQwQvLSqA93JULjoOVC7XkMAro | 7170 |
nemo_curator/modules/distributed_data_classifier.py | sha256=FFoniNNy3reqCq4LhobAtVDbHJuTOksPb_TVSCP8Sqs | 11799 |
nemo_curator/modules/exact_dedup.py | sha256=JpDbBsdlvgZ09iPfwjpUFhhXqNeLVw4K0uSAWZJYdLM | 6406 |
nemo_curator/modules/filter.py | sha256=3cp1XK8aN-g7bGXIRI4dWyLU3IvcCwOnp7YyHL7Os6M | 4529 |
nemo_curator/modules/fuzzy_dedup.py | sha256=LBnqP-8HO3l3hQ6suBgurucXIbezh_FBP4qe8e36yYE | 57083 |
nemo_curator/modules/meta.py | sha256=9q-4cd1pZQm4T5D-U_oydoBtavJDxR_ssAZy32KS0pI | 825 |
nemo_curator/modules/modify.py | sha256=-vuGZy_hhIN963Eq3aIhhCVbHOYOV8nle-Ik8VGS8CQ | 1406 |
nemo_curator/modules/semantic_dedup.py | sha256=ope8iYUR9D3ECVPxsDgv3otNfl9AP8mznht8nGXVfqQ | 22148 |
nemo_curator/modules/task.py | sha256=4xpnykbH9qQiyODjwVMlR8GlT6xCEAgk9tWBtPv3OCo | 18766 |
nemo_curator/nemo_run/__init__.py | sha256=SsipNxmMuUKT-P22M7z3i2K10gSybbAuHklWQgxl9Zs | 675 |
nemo_curator/nemo_run/slurm.py | sha256=WySsj9PYVhnMJZAt_4kOqPLyRsnr0JYnizSAL6d4oms | 5298 |
nemo_curator/pii/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/pii/algorithm.py | sha256=vQyyewAniVQ0xpCPm61IdjQwfZWm9MLcs7etuT96ncI | 9843 |
nemo_curator/pii/constants.py | sha256=FhrU0UmUUHE6LIxDNAP4WQuY3Dl48d0bdBwr5sM-TVg | 369 |
nemo_curator/pii/custom_batch_analyzer_engine.py | sha256=bc5EmJ5BZs07C6Y2Z-rlX6FagC1R29hjxDyvisrbsFY | 6668 |
nemo_curator/pii/custom_nlp_engine.py | sha256=1ZPR8gxCskO7pajgSFgeOz_eAERJjNV12V1Oqqz9rJ8 | 2686 |
nemo_curator/pii/recognizers/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
nemo_curator/pii/recognizers/address_recognizer.py | sha256=Pc_lSuT-J5asHQQ9So5GW-2yoSDoIqoQzSsjtkyg9uY | 1828 |
nemo_curator/scripts/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/scripts/add_id.py | sha256=vVXRVjEq54-o4eCKbIMnOAvAj8oieGDWLKYvOZpStT8 | 3969 |
nemo_curator/scripts/blend_datasets.py | sha256=PYdps-cilBzHPkIMV5HYZCqVpJ2NYwEhoiOttCkV89w | 3715 |
nemo_curator/scripts/domain_classifier_inference.py | sha256=dneASqLn-ghTo7D2tfWueZTbItgkinyv5dqhIQASfW0 | 4073 |
nemo_curator/scripts/download_and_extract.py | sha256=8vCS5wL9cvpYG_jzucAEBUL0XY77ru7qG64WvgBkDD0 | 5814 |
nemo_curator/scripts/filter_documents.py | sha256=r4rD8pl4ySO7XHFAbgfGb2r0ivv-OB0HdnGJPbaUv1c | 10984 |
nemo_curator/scripts/find_exact_duplicates.py | sha256=78XGrNJB3TaEvBhH169HyyiRI6VIQ4b_NbY1MIicKXQ | 4000 |
nemo_curator/scripts/find_matching_ngrams.py | sha256=jPpnh_vHFeG7sPXwUNw2WL2oyBugaKJ7wBzuZZaxTi8 | 3335 |
nemo_curator/scripts/find_pii_and_deidentify.py | sha256=YzA7kog983oJanNaKWeERcDlPAaZO039QY2VzCU0PUM | 5536 |
nemo_curator/scripts/get_common_crawl_urls.py | sha256=jZVTY2GJXipO-WqiTYP830AzuecNtjvRnsAEKesZV0g | 3479 |
nemo_curator/scripts/get_wikipedia_urls.py | sha256=VcBW9tcmVxRHx7ijWFPf_2kmWF6LLkdjZFhLSHA_zwI | 1862 |
nemo_curator/scripts/make_data_shards.py | sha256=QFykRSrrur3KjziOnxkbDX9pppJ06qyi0riLQiqKFmw | 2589 |
nemo_curator/scripts/prepare_fasttext_training_data.py | sha256=-H7gHQnojdRc1EdxCt2NKJRn2giNi533pMujOli8lhA | 3521 |
nemo_curator/scripts/prepare_task_data.py | sha256=9RzN261e1x4b3L7tORJF8maAJh2ZW4hXJJjQkZNQMjs | 3034 |
nemo_curator/scripts/quality_classifier_inference.py | sha256=HxhoPlAUJGa64a7-8KMKD15eMd44WUhDOw5cp1EtKgA | 3887 |
nemo_curator/scripts/remove_matching_ngrams.py | sha256=GEXccxDhUxy2vvjkqO3IF5Q8wW99g4Kn9TqyzKJKwtE | 5306 |
nemo_curator/scripts/separate_by_metadata.py | sha256=1e9NAYOkgidEXoLXb5u7DOyrS2ayExDEHaHtADbuzKc | 3732 |
nemo_curator/scripts/text_cleaning.py | sha256=eha9Gztu3wx3E0QJgRB_2XMoaFbdu8sOJOuQOLeJpDQ | 2980 |
nemo_curator/scripts/train_fasttext.py | sha256=bTPRQVxx2xQ37NH_wci6Kw_JtZvIX5JrZemV7Qbqzmg | 6420 |
nemo_curator/scripts/verify_classification_results.py | sha256=dQ1PfqDnOuh_Hrb8XXqkiLL7KWJH3ghVU3PtiWaii-I | 6615 |
nemo_curator/scripts/fuzzy_deduplication/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
nemo_curator/scripts/fuzzy_deduplication/compute_minhashes.py | sha256=jpCFxnmNJ68x9-n4v991EXNPLfBEfjtty6-ERP6Sj4c | 5340 |
nemo_curator/scripts/fuzzy_deduplication/connected_components.py | sha256=f1V605Cp-Py5JBz8yQTLQWp8F_u4H20lTFvQZqyWjF0 | 2769 |
nemo_curator/scripts/fuzzy_deduplication/jaccard_compute.py | sha256=9nhJ3JBrZQ96mIlvYlS_TNYLz2kTIam98hh3ziBpZuQ | 2779 |
nemo_curator/scripts/fuzzy_deduplication/jaccard_shuffle.py | sha256=3_c0O7YceShfY_kAsFAKctaW3TNbecKYjXZPhIxRpJE | 4107 |
nemo_curator/scripts/fuzzy_deduplication/map_buckets.py | sha256=vxRnUfaQyFk42y9fjP8dgoAMqN9JHuL5Y5ZAuv2TEHs | 5716 |
nemo_curator/scripts/fuzzy_deduplication/minhash_lsh.py | sha256=HthsBEILImY_Gm4J1c_5RhgXlsC_RQb2MZ4egmn2Xmg | 3875 |
nemo_curator/scripts/semdedup/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
nemo_curator/scripts/semdedup/clustering.py | sha256=Lo3puipI8XBbZy2AYtqsJD5QrCsw8udUApK9Dby-yEc | 3857 |
nemo_curator/scripts/semdedup/compute_embeddings.py | sha256=kPz-7TmA-02sTe5UJkWgo7TMmP9BAZlCoBrEuk5536k | 5084 |
nemo_curator/scripts/semdedup/extract_dedup_data.py | sha256=WKR8xnd036b8qPys07UI-8NRIBBmQV0nu3jp3Gn9XdY | 3288 |
nemo_curator/services/__init__.py | sha256=B3D0AylSmuOOruKFvG_vqV78f9OuR52mDGTWa4npDoQ | 973 |
nemo_curator/services/conversation_formatter.py | sha256=JRMOpMCMrCz2nN4BwHvcHigi_xJTrw6lo6MTlzGN2kQ | 1022 |
nemo_curator/services/model_client.py | sha256=1OEgNbF9vgeso9d64G4cKiGL9ZfxU5Csue0hOOHwi_0 | 2994 |
nemo_curator/services/nemo_client.py | sha256=uVN8xhRVsKr7iD20xJ8j-ZfHWzlsOUSnIkvhf1xF0v4 | 3626 |
nemo_curator/services/openai_client.py | sha256=z4Kqvs1ItyvhnbPJJB-Idpvys6iNUyr5ai_IayjwQHw | 6035 |
nemo_curator/synthetic/__init__.py | sha256=EskbOiR6d4VCMNgR50IIo-mgjc4SDnaQ2ru-4x7iw3E | 3072 |
nemo_curator/synthetic/async_nemotron.py | sha256=Meg3F9-gTt3q0_n59dQoVMIJVsbhE2xdTDSe7sflroo | 80632 |
nemo_curator/synthetic/error.py | sha256=EWGJNnhanzilxsUrlMKOgcv2fJ_9YjunOFIpLpteOzQ | 803 |
nemo_curator/synthetic/mixtral.py | sha256=CUKR-bmD3qYayxtK08987A7AepAa5jbzvGfoltxzSZ4 | 1319 |
nemo_curator/synthetic/nemotron.py | sha256=m1S03olbsjYmNcv9XF5P2xUF5AtF_wk0IshAGtIgMzk | 71704 |
nemo_curator/synthetic/no_format.py | sha256=TjJoa04f_VQTczgAYf67p5XnGB0oKATXFRQ6SXSYTAk | 1194 |
nemo_curator/synthetic/prompts.py | sha256=DjsQRoi4iqQX9FNeWFEVoKC0TAKGFurCqoEt-Gxgwyg | 8198 |
nemo_curator/tasks/__init__.py | sha256=IfaRyarhNVd7icbFyiT_fBnWoY38iCcIqZoFR019Wis | 1499 |
nemo_curator/tasks/downstream_task.py | sha256=krVCvrbuZy9ToNyImYXpxUl03hBCnPVlWcrOXWvCwPY | 1968 |
nemo_curator/tasks/metrics.py | sha256=MgBEr9mC9KEeDRfmW32_k4a06f6ljWveyC9LBLtAG0Y | 18930 |
nemo_curator/utils/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/utils/config_utils.py | sha256=0_Uy16dEXqqTeqiQsBCRzD8E_wzMYOYvz0_jkC-ZGhU | 3598 |
nemo_curator/utils/constants.py | sha256=Szr4RTF8hFFkTqz5dhh_IbTReZfBXFLlhyokggQ_bvI | 3296 |
nemo_curator/utils/decorators.py | sha256=t0gGx4HhyEJntCk5XAAaoePmoahSyTNjhqtEpGeeHrY | 864 |
nemo_curator/utils/distributed_utils.py | sha256=hORyJQ2CMfLrwIs3NHLTiFeacs2HgfSfSJSWvzhfZhs | 21557 |
nemo_curator/utils/download_utils.py | sha256=i97IsKFyOCWrZa6JuZh84ugabIO9LHkKxIp7fzAR9ic | 7458 |
nemo_curator/utils/file_utils.py | sha256=vcaNOC39I-3cv5sPiwIQkDiSblWND9x1hbi3pp8jr9g | 8541 |
nemo_curator/utils/gpu_utils.py | sha256=3hZWSm_aQdxyIyOjf2vsuHo3QT9qzKNh63ZPzvuTohE | 1113 |
nemo_curator/utils/import_utils.py | sha256=9-nRqRrjYAfdi68avUB0QHuU5VZ5KPvc2J2Ws0D-jus | 11869 |
nemo_curator/utils/module_utils.py | sha256=aaTOspYbMLpGby-gd-wcVKhVv_eBbtYpPd4mjMiRgSM | 779 |
nemo_curator/utils/script_utils.py | sha256=nHSwTBH55SO9MTcftGDjOYMTPririXwcvzzxNCQeOaU | 18123 |
nemo_curator/utils/semdedup_utils.py | sha256=aWnn5ZX9Nkk4G1f9nIHKjRgjdAPxGZ_QOfkvGtxq7ZY | 15323 |
nemo_curator/utils/text_utils.py | sha256=OMFateEXEDtlsIrZov7J_6CXYI00M0AAfXs_m-wxiM0 | 5920 |
nemo_curator/utils/fuzzy_dedup_utils/__init__.py | sha256=NVzEmeQhr33NGGaBYs_GTg-TgkVqEAgU8wYE_BWXMLQ | 610 |
nemo_curator/utils/fuzzy_dedup_utils/id_mapping.py | sha256=05MwMpEDBycgB9rESP5yIZSbZDqAW4tb3t2zJLKSEtM | 1862 |
nemo_curator/utils/fuzzy_dedup_utils/io_utils.py | sha256=SZddQUZWUI47b_oRNfhy3myBH1zEHitJ5JaU6qetNbc | 6292 |
nemo_curator/utils/fuzzy_dedup_utils/merge_utils.py | sha256=PQpt26iyJQhBiBdjA1n3fVMM-HYxJrU1YeTGucInxUA | 7800 |
nemo_curator/utils/fuzzy_dedup_utils/output_map_utils.py | sha256=XzhhT-dP885ZQW5Y37jCvMz4D0DKS_Lkg7V3UqQaoEg | 2588 |
nemo_curator/utils/fuzzy_dedup_utils/shuffle_utils.py | sha256=F-FIwICEVM6Lmd90oNw20XaQCbdWXGzQAISQ0DUZsXc | 5219 |
tests/__init__.py | sha256=Gl7X71vLknbUhnkfFuOWJvs_by-CkQjqERM3FQp24Uo | 1253 |
tests/test_add_id.py | sha256=54wB9j5C79YGEuoMUBNW4e4gHHAwXNLJr4fET1-IpDk | 4987 |
tests/test_blend_datasets.py | sha256=X4Xu9jTroU0dbu4WFkvHeVqePxj_ncASJZfLpIulyWA | 3878 |
tests/test_config.py | sha256=5sx7G-ATi0JJ35UeLWWlOLIFHibuvpb3trpMC40GV5o | 2716 |
tests/test_dataset.py | sha256=eD01HC-wwoCqLP4zGNn1N__nv-NRqIAd7ey9HdBGD0I | 858 |
tests/test_download.py | sha256=Yirg1czyXKAahBSyEeoQ_IPR23EzQfq_KzmX8wr_c_w | 6426 |
tests/test_exact_dedup.py | sha256=hBLdCU7qBUxaePygjtT4QhsxK5Pcyf8x5b6mIBr3U_Y | 1900 |
tests/test_filters.py | sha256=R-CTSCsFx8ZZlFLbGcv9K_wjT2ilgBfVwk_cnq1R2og | 30378 |
tests/test_fuzzy_dedup.py | sha256=G8uq_6xfjFt4BebhuEhGPqw7YsMtoHMp1bsmiIhyRBs | 16424 |
tests/test_io.py | sha256=szxF9ODvj390Hc800CQpW1MUhOMV7sHU328vog8a5W0 | 7819 |
tests/test_pii_accuracy.py | sha256=vsjKth8M73To0UkMJvhR0WpE6jsZLMZH4zsyXrHa9y0 | 6376 |
tests/test_semdedup.py | sha256=mzX_95_xBa6TfpXGsbnEJMoQokehjA_nCi0TD-HwB6Y | 2800 |
tests/test_seperate_by_metadata.py | sha256=FGXzOXXDys7zXzyXkY2JuwMlV0Jp8fs26gOzi6IcfuA | 2566 |
tests/test_shuffle.py | sha256=JaX3Psby4bEIe0-BNOCDvnrHP9rnXk2EbC-MY5x6hQo | 7389 |
tests/test_task_decontamination.py | sha256=RFj0x7rkl_pQEGf3RI4KVB4T4gADjX4Xf3FOBHJnCyA | 12881 |
tests/test_unicode_reformatter.py | sha256=iALTzl-G1ggy9ZjiDWqhw9ub1x3DqZCQro4S1UVR0YA | 2082 |
invisible_unicorn-0.4.0.dist-info/LICENSE | sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ | 11357 |
invisible_unicorn-0.4.0.dist-info/METADATA | sha256=EpfeHDk9n_QF3rW6BnlohOWHUcBgsaRwKOeXR_Bty94 | 14197 |
invisible_unicorn-0.4.0.dist-info/WHEEL | sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ | 91 |
invisible_unicorn-0.4.0.dist-info/entry_points.txt | sha256=H-9bbcmtomXmT-SDGyevoyaCYrRPCGiTyZCeobb2BnY | 2410 |
invisible_unicorn-0.4.0.dist-info/top_level.txt | sha256=NZpMgId9Qc8gKXBh5ITLq99W4VsNI3MN1cG7f0hm_n0 | 19 |
invisible_unicorn-0.4.0.dist-info/RECORD | — | — |
top_level.txt
nemo_curator
tests
entry_points.txt
add_id = nemo_curator.scripts.add_id:console_script
blend_datasets = nemo_curator.scripts.blend_datasets:console_script
deidentify = nemo_curator.scripts.find_pii_and_deidentify:console_script
domain_classifier_inference = nemo_curator.scripts.domain_classifier_inference:console_script
download_and_extract = nemo_curator.scripts.download_and_extract:console_script
filter_documents = nemo_curator.scripts.filter_documents:console_script
find_matching_ngrams = nemo_curator.scripts.find_matching_ngrams:console_script
get_common_crawl_urls = nemo_curator.scripts.get_common_crawl_urls:console_script
get_metadata_from_corpus = nemo_curator.get_metadata_from_corpus:console_script
get_wikipedia_urls = nemo_curator.scripts.get_wikipedia_urls:console_script
gpu_compute_minhashes = nemo_curator.scripts.fuzzy_deduplication.compute_minhashes:console_script
gpu_connected_component = nemo_curator.scripts.fuzzy_deduplication.connected_components:console_script
gpu_exact_dups = nemo_curator.scripts.find_exact_duplicates:console_script
jaccard_compute = nemo_curator.scripts.fuzzy_deduplication.jaccard_compute:console_script
jaccard_map_buckets = nemo_curator.scripts.fuzzy_deduplication.map_buckets:console_script
jaccard_shuffle = nemo_curator.scripts.fuzzy_deduplication.jaccard_shuffle:console_script
make_data_shards = nemo_curator.scripts.make_data_shards:console_script
minhash_buckets = nemo_curator.scripts.fuzzy_deduplication.minhash_lsh:console_script
prepare_fasttext_training_data = nemo_curator.scripts.prepare_fasttext_training_data:console_script
prepare_task_data = nemo_curator.scripts.prepare_task_data:console_script
quality_classifier_inference = nemo_curator.scripts.quality_classifier_inference:console_script
remove_matching_ngrams = nemo_curator.scripts.remove_matching_ngrams:console_script
semdedup_clustering = nemo_curator.scripts.semdedup.clustering:console_script
semdedup_extract_dedup_ids = nemo_curator.scripts.semdedup.extract_dedup_data:console_script
semdedup_extract_embeddings = nemo_curator.scripts.semdedup.compute_embeddings:console_script
separate_by_metadata = nemo_curator.scripts.separate_by_metadata:console_script
text_cleaning = nemo_curator.scripts.text_cleaning:console_script
train_fasttext = nemo_curator.scripts.train_fasttext:console_script
verify_classification_results = nemo_curator.scripts.verify_classification_results:console_script