krawl

View on PyPIReverse Dependencies (0)

0.0.6 krawl-0.0.6-py3-none-any.whl

Wheel Details

Project: krawl
Version: 0.0.6
Filename: krawl-0.0.6-py3-none-any.whl
Download: [link]
Size: 36289
MD5: b22a0ee9f1ad3a8d884e61c8ed10005a
SHA256: a6fe5eca812a3f0919ce2c3d24185e7b77df2445ef3dfee68bb639fb5ad09ed8
Uploaded: 2024-10-20 17:36:46 +0000

dist-info

METADATA

Metadata-Version: 2.1
Name: krawl
Version: 0.0.6
Summary: Krawl: A collection of crawlers
Author: Kun Lu
Author-Email: hi[at]shugic.com
License: BSD
Classifier: License :: Other/Proprietary License
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Requires-Python: >=3.9,<4.0
Requires-Dist: aiohttp[speedups] (<4.0.0,>=3.10.8)
Requires-Dist: beautifulsoup4 (<5.0.0,>=4.12.2)
Requires-Dist: duckduckgo-search (<7.0.0,>=6.2.11)
Requires-Dist: fire (<0.6.0,>=0.5.0)
Requires-Dist: jsonlines (<5.0.0,>=4.0.0)
Requires-Dist: jsonpickle (<4.0.0,>=3.0.2)
Requires-Dist: numpy (<2.0.0,>=1.25.0); python_version >= "3.9" and python_version < "3.13"
Requires-Dist: pandas (<3.0.0,>=2.1.0)
Requires-Dist: pydantic (<3.0.0,>=2.3.0)
Requires-Dist: pyyaml (<7.0.0,>=6.0.1)
Requires-Dist: requests (<3.0.0,>=2.31.0)
Requires-Dist: retrying (<2.0.0,>=1.3.4)
Requires-Dist: scikit-learn (<2.0.0,>=1.3.2)
Requires-Dist: selenium (<5.0.0,>=4.15.2)
Requires-Dist: tldextract (<6.0.0,>=5.1.2)
Requires-Dist: webdriver-manager (<5.0.0,>=4.0.1)
Description-Content-Type: text/markdown
[Description omitted; length: 2740 characters]

WHEEL

Wheel-Version: 1.0
Generator: poetry-core 1.9.0
Root-Is-Purelib: true
Tag: py3-none-any

RECORD

Path Digest Size
krawl/__init__.py sha256=vtgPn6xqs6fxoevzzUoKmvV3MauWsObwH9mdSlFkOQw 150
krawl/api/pagecontent.py sha256=DQiLChwvO3QG8lLdUDW8Tqz7f_ILzcnDXExMU--_L6c 3430
krawl/api/search.py sha256=gbRi0YBPDVOQSsB27caC_FJBzaMvrn8bapQFtBj282U 825
krawl/common/__init__.py sha256=-w1f3S49-2Xj3tpzN0j45u3xhC0Wy6NPbpeji33pqV0 83
krawl/common/config/globals.py sha256=aPYft76CvvFQE0ALfm5ZLBKCSvXXA5ugcwyGZySZ25s 285
krawl/common/data/dummies/no_title.html sha256=l7FITmjIaQkdwt6qHg6-bjUB4nmpfZEKIvItqzlvZx8 17239
krawl/common/data/dummies/tiny.html sha256=hwkoSuRpAfCz4JQAZGTRTi2pUWxHZsm-LSbCLbmQ0-Y 204
krawl/common/decorators/basics.py sha256=CzvZipd5d0z6b3Erf8GZZ9zRkLgpK2sNVoSXsrbGu5U 217
krawl/common/flow/__init__.py sha256=tVZp3Ob5xLJpf7TZR_0O8RUZeTkSls7vNl6J5D4DeJk 72
krawl/common/flow/base_pipeline.py sha256=lEGMLGvlxf_7uQMXF_yAAop4RnjzM1_rGZ9QlYMjVuE 1087
krawl/common/recognizers/__init__.py sha256=55sx8h_j2eZHA1W1_2Ipu-Quzm3rR_jdFaJ4irgFYGg 265
krawl/common/recognizers/base_recognizer.py sha256=eGKNTOFlsJTQfgD-OHRHD1BFpHvPaZWOrJkqqBS3wA0 196
krawl/common/recognizers/content_recognizer.py sha256=PwQtKLzXJLwEGW58Dxv3526kwmpnKg44_8__qEkCoPs 2687
krawl/common/recognizers/soup_extractor.py sha256=q6S5kuF7FQj2uayZVxFRBhRR5P76tNMc673cVhr3V1k 4549
krawl/common/schema/dtypes.py sha256=vs2Bo9QgXyUIFSHPKmF2vmuODjlwW8cOy1AbFksxhsM 1266
krawl/common/soup_utils/__init__.py sha256=gK2jASDSH7Qor9ZT9UfeqMeMceuB-ZPbwmqi4-AGAgQ 247
krawl/common/soup_utils/context.py sha256=R5be7ejO1b9POHH1Rz21T7fse-evdGjPTBObo80WzpU 696
krawl/common/soup_utils/page_reader.py sha256=NuOzh9fOelkQ_9JJGGOR8ptFQ-PKjtrcHoQ6-naq2Nw 2927
krawl/common/soup_utils/texttools.py sha256=2vuclcSyVIgod5Dr3Uh425DKPUik-ZaRImAaJvUBC-8 330
krawl/common/soup_utils/urlcheck.py sha256=HHq0kl9ZSEJ1aIhqQ-LwUUktsn3t38YhbhMHA7wzKJs 1630
krawl/common/textware/__init__.py sha256=oxdLFF1LLwpj_ZlCkvw_HN025A3XQp8zrPQ8CzSW27k 67
krawl/common/textware/textfilter.py sha256=moCNXqgf3DXMTKUDzZi_k-jGAPZwgEseQjbrQ-a6z5k 2868
krawl/common/url_utils/__init__.py sha256=yJQufcpsy5KQg1w2M_k84P9tNDFoHlT4ks9no01PXTg 857
krawl/expert_crawler/__init__.py sha256=L0D8grDETmJodtbw2F6u6Rkxspb_cRAV08iw1Lnk3zg 155
krawl/expert_crawler/company_crawler/__init__.py sha256=B-X6obAhaDlXlaoMJP3KxoVT-LAmaHe5wAYAMyAM80s 98
krawl/expert_crawler/company_crawler/crawler/homepage_crawler.py sha256=MHRwE01pF8N_4u3TgkxVK67lA0_Ch-7THx-opo-aZ_o 1806
krawl/expert_crawler/company_crawler/ml_models/dataset.py sha256=p_jm86QKY3yh2exI2game0Ex8up33sWj9Rbr0UI9uEY 264
krawl/expert_crawler/company_crawler/ml_models/dtypes.py sha256=MOfqpKsmzkDvlgTp29yWWOMeLngbsJ32HSfbnHEo0C0 112
krawl/expert_crawler/company_crawler/ml_models/eval.py sha256=ZytP-n3pbEXKUbkzGYiaUh5LCLt22ldYW6tQuTOhVUM 322
krawl/expert_crawler/company_crawler/ml_models/predict.py sha256=nel_wskEK80vkBsutuO1-0FKk9Kq8CVXLeSQefJRcB8 939
krawl/expert_crawler/company_crawler/ml_models/train.py sha256=vIHxySHbZP5IU24g0W56eUf_qU1iG45h9I1PHfTaq4c 464
krawl/expert_crawler/company_crawler/ml_models/train_not_using_pd.py sha256=iOOrZCEJytYb8sxU_Rz4BmDIq_ahMOKlYeFjqfHAUAI 938
krawl/expert_crawler/company_crawler/ml_models/training_data/labelled_data_classififer.py sha256=owjJSAg5bSS_SR8O_oFeiWERHpIiZFFeKGLvp5uBEoQ 300
krawl/expert_crawler/company_crawler/ml_models/workflow.py sha256=-2Drh8jX168x_bXy3otRz-QEotaQ-9wBOtSWiBOLzow 544
krawl/expert_crawler/company_crawler/predictors/__init__.py sha256=p_-DXflX50wqdauHikUwyKiHb_eSkUmbth1qx4aHe-M 59
krawl/expert_crawler/company_crawler/predictors/base_clf.py sha256=S4rq2C10ScbK_2ouqn-FRUIcn2-C-LriOLEzwY5Ihc4 873
krawl/expert_crawler/company_crawler/predictors/model_io.py sha256=yDZuxFvk3KQbO_FNRH7_FJIzbikQ-APO9YdqSA4jq-g 588
krawl/expert_crawler/company_crawler/predictors/node_predictors.py sha256=ZO0aqMHdS4fcKQVQIOA9mCwNiK_9kQpjkg7wXFJUP44 720
krawl/expert_crawler/company_crawler/recognizers/__init__.py sha256=upDWXOjQHKjeFx5CahomW9S8nVNY4tvAlQ2aevAZfaY 282
krawl/expert_crawler/company_crawler/recognizers/title_recognizer.py sha256=fE5jnuqTjH-jk8VKVagy5BmINPgayk_egNGZgxATnUI 1899
krawl/expert_crawler/company_crawler/schema/dtypes.py sha256=R34YObp-LUAwzO891nla13xHorGVQrqVCNubUOyoTAA 111
krawl/expert_crawler/generic_crawler/__init__.py sha256=RWGKZ8wCnkp4KypDKqWK1LkjXMuCSIfo8cDTCJlGJTI 89
krawl/expert_crawler/generic_crawler/crawler/content_crawler.py sha256=GmCmObJiAP2BS1vngptSUr0YDjyj394zoN2o6RDc-MU 4031
krawl/expert_crawler/generic_crawler/crawler/metadata_crawler.py sha256=ojOFUdUxwryLKrgrMZLyFTg9s4wiuyqlj8P2HuzZmKA 4087
krawl/expert_crawler/wiki_crawler/wiki_cli.py sha256=8ZotHPxMn9xfsFUgCnzkgVUVZMr7lN4v1UofUHegG7c 5300
krawl/libs/models/sequence.py sha256=WpBa0KOH9lnQo3Z31Hry421AQ8s5O_IAjUZYTk6arB0 1059
krawl/libs/readers/read_url.py sha256=8DTorJZhX8o3ohQVDjBt5RD716Uy3r3C5RBLmsg3vtU 3846
krawl/libs/searchengines/__init__.py sha256=jYc8U1l9F64F5RL2OKfS4zmEhfJXvsfakJwxAkuL4Gs 123
krawl/libs/searchengines/base_engine.py sha256=hFajU05fW3fRiGJtV0hXOq_rLRhPiPngfv0Qhgc1AlI 364
krawl/libs/searchengines/duckgo.py sha256=p5ZjUOeJsshkU9sIRmTSxrarPu238g1Plhx1nJxO1pA 538
krawl-0.0.6.dist-info/METADATA sha256=KiYhKF7bLEh5Qz8lUR6Kmku-YuMQz4wXXo1ram2N1vs 4002
krawl-0.0.6.dist-info/WHEEL sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg 88
krawl-0.0.6.dist-info/RECORD