goose3

View on PyPIReverse Dependencies (7)

3.1.19 goose3-3.1.19-py3-none-any.whl

Wheel Details

Project: goose3
Version: 3.1.19
Filename: goose3-3.1.19-py3-none-any.whl
Download: [link]
Size: 113435
MD5: 35918f79ed76be88cbc632fcea7cfbe6
SHA256: 65744ae4cce205314b28026179d69b05aaa9eee4259b5ec0732951e1ab5c5c8c
Uploaded: 2024-01-19 16:22:10 +0000

dist-info

METADATA

Metadata-Version: 2.1
Name: goose3
Version: 3.1.19
Summary: Html Content / Article Extractor, web scrapping for Python3
Author: Mahmoud Lababidi
Author-Email: lababidi+py[at]gmail.com
Home-Page: https://github.com/goose3/goose3
License: Apache
Keywords: python,scrapping,extractor,web scrapping,nlp
Classifier: Development Status :: 4 - Beta
Classifier: Environment :: Other Environment
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Operating System :: MacOS :: MacOS X
Classifier: Operating System :: POSIX
Classifier: Operating System :: Microsoft :: Windows
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Topic :: Internet
Classifier: Topic :: Utilities
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Requires-Python: >=3.7
Requires-Dist: requests
Requires-Dist: Pillow
Requires-Dist: lxml
Requires-Dist: cssselect
Requires-Dist: beautifulsoup4
Requires-Dist: python-dateutil
Requires-Dist: langdetect
Requires-Dist: pyahocorasick
Requires-Dist: jieba; extra == "all"
Requires-Dist: nltk; extra == "all"
Requires-Dist: nltk; extra == "arabic"
Requires-Dist: jieba; extra == "chinese"
Provides-Extra: all
Provides-Extra: arabic
Provides-Extra: chinese
Description-Content-Type: text/x-rst
License-File: LICENSE.txt
[Description omitted; length: 9551 characters]

WHEEL

Wheel-Version: 1.0
Generator: bdist_wheel (0.42.0)
Root-Is-Purelib: true
Tag: py3-none-any

RECORD

Path Digest Size
goose3/__init__.py sha256=-04SwuDmWlFVkfDG9p7i_a64uJRgrsCRGt56C2lFi6s 6047
goose3/article.py sha256=Cw3aDswteU6e7HaibTW6AHHE3qbXfN-9DSvWpBpsGss 8572
goose3/cleaners.py sha256=0zHU5v94Hvth_QhzgEMe866ZvAFqBwuPwKkw8H_QZ10 10768
goose3/configuration.py sha256=sFKu7kQ6JlZRUKnGgxxnz8MVGM3oPHq_OxW5OqVKdY8 22126
goose3/crawler.py sha256=zOt4xyz-WQNzwIgon7-RsGhyTSeCAx1sNb3dQd6dXYw 13236
goose3/exceptions.py sha256=OjaCONhNH6FM9CnczOzsJJRZemcqwLi9vKgsAGEn_8k 62
goose3/image.py sha256=7rvZhGH03UR9YVEhQHb0CuRDL4MCcw9tsa6U3XCoGpo 3689
goose3/network.py sha256=h8_9ehmonWxb4K2qLeOwIU86VChNQbvno6vCzCA-RjE 2162
goose3/outputformatters.py sha256=G9O8VW3doh1f2J9_B_a7z3dQhFpIa1icat1MpQHNYB0 5388
goose3/parsers.py sha256=hwq8odt3eEuj5Q0HOrNPZmxN4dGU5Qq-54CorJtWOfU 12227
goose3/text.py sha256=ofuwSfhkQQQ9jixc_9_T9i94qJ-y1kxaf9aAIVhzRRo 7461
goose3/version.py sha256=fesh0mv5qhxi0r8PJgtRuPvRxWAhlemfP8JYKyFS0y4 946
goose3/video.py sha256=1mdsMFjMZOhV6qsuSe6sOVPonCnEG_e0AwplLdquw_k 2156
goose3/extractors/__init__.py sha256=qZUOj3FHu-KGvLlrG6eXWy16taqoh315njn9nLFqvTE 1145
goose3/extractors/authors.py sha256=1ONhMl_Me_53XBjBI8iBO7OZdWZ7JjsO8dP4zTNKhoI 2925
goose3/extractors/content.py sha256=Hr8LFTnwd2pewTSB4cGrTEIAkaLJan9IAjQUzyZQVi8 14075
goose3/extractors/images.py sha256=1gMWeWYDaDdyxmpyI8NRA5QaKpeFMWIBUGqn032rNWg 14596
goose3/extractors/links.py sha256=wY3r6h3531fKUj8Woc0GGQd5lyyS9asfOKLdLWFq3lY 1197
goose3/extractors/metas.py sha256=D0PEZeluEo5amlZ19Ilx5l1-GPQuKyYz9MJqo0NYdck 4782
goose3/extractors/opengraph.py sha256=hCtV715KhFSmPtE3oBQPjH-cjH3cjcAVNMm0TAXXLNs 2592
goose3/extractors/publishdate.py sha256=GFMUoGsE3vUjaBscyk7yTNwrunYmg79eIz8m0pbPSP8 8528
goose3/extractors/schema.py sha256=SNSSIgWyb1hOWU5Np3kyL2HZGilSft9-PoxGm-VhPB0 2441
goose3/extractors/tags.py sha256=PwjwEWufn7tfquv3GVCsRJLWYVHmNZ84f2eO8C_ro-Y 1608
goose3/extractors/title.py sha256=wzKH960SRRJ6COzyAjNoD93Cvi8tCsxtobNBQ6XJFsI 4247
goose3/extractors/tweets.py sha256=5v96l4IRxZBincTPd2L2tKNgGDj9w8O1QvjYPKo-gAM 1328
goose3/extractors/videos.py sha256=DfCUFwzfM-30clTfm51kA2ov4sVTCn6lAxymZlrmdX8 4448
goose3/resources/images/known-image-css.txt sha256=395m1E0gmnR964NRKCp_U80nNthO8SkkaT5IRCcx1xg 232
goose3/resources/text/stopwords-ar.txt sha256=MI4IgpiJ_9wioBXoWCalGyeStFkuMlEwD9wSxnRIbgE 1452
goose3/resources/text/stopwords-be.txt sha256=Sa08G0yBPKJhz2Om1kQ1BQCMc9XvPWQv5enRH1kC858 936
goose3/resources/text/stopwords-bg.txt sha256=eiIwYk1TU8YcYYPbMPjUzZSZlgd7gl5o7d0LIthzqHQ 2409
goose3/resources/text/stopwords-da.txt sha256=A1tQ6LutIdwsN09YSqZM33DcWFbX6-RndIA29lSEW7M 484
goose3/resources/text/stopwords-de.txt sha256=tl02XGxiNYyDhaAnvHyf-vI7UJlS2RuJNwW_Jfg3RCM 5968
goose3/resources/text/stopwords-el.txt sha256=MrNPZccmGguHmadO9WC762dhS2uGJnhBj8kdQoQ-jvU 13903
goose3/resources/text/stopwords-en.txt sha256=taCMo75agEB4ZWZjws5J6Q6kVD9Z6Bmkm-cMGKYpOBU 3585
goose3/resources/text/stopwords-es.txt sha256=g1uQlrf5_Sk_3oyzxPkA8p1gWOYB-5LvSmoiE91yHMI 2185
goose3/resources/text/stopwords-et.txt sha256=TQBb3Q388dQ_ZVRjlPQqnrfONORKIC2cBB7UM3mYGa8 189
goose3/resources/text/stopwords-fa.txt sha256=lrNxTp0sv0QKXpgnyEV8xyflxabJQZGdzfSnmzICPsM 7885
goose3/resources/text/stopwords-fi.txt sha256=NH7nDTJ5u-hAIKGSe-NzAbXkb-No5uDSqvY3UND_fvo 464
goose3/resources/text/stopwords-fr.txt sha256=_4UmVTc-MYOxqz4s5N7s_ibhtPQWNnQyJAOF4nm82Fo 966
goose3/resources/text/stopwords-he.txt sha256=-GFMB-dXEfz-KUFt_jMUrua4PuMssvXNUTYO9tByUuI 1837
goose3/resources/text/stopwords-hi.txt sha256=vhZ1hkEqxjtWtxysChLpEI_zElQTzhG5lcvtvC7XaKo 2789
goose3/resources/text/stopwords-hr.txt sha256=QQpdS8w0mlmuvtj0GBWvs3FBpoqT3sbPqw3NNFrqIFM 871
goose3/resources/text/stopwords-hu.txt sha256=tzx4glpJIa0XJ44a3ggUbrs4neUdfI6dWEb57dJjg7A 2336
goose3/resources/text/stopwords-hy.txt sha256=lYA8ECIEJfy2kMn2I0OX0CmjAsOGjjouZHczW5xzfFM 297
goose3/resources/text/stopwords-id.txt sha256=F-NDRwcG5UeZ5NWSapM5qtI4wtrLUwtK_GQNE64DVWw 10500
goose3/resources/text/stopwords-it.txt sha256=ykDV7p7nQxu1R3z81FWlpe_BMgF1V6Gjgvw7mF8pYQU 1696
goose3/resources/text/stopwords-ja.txt sha256=962IY0fXtUlS0UeazgxF2RbKWNm_3b1aBPwmSyDhFFk 1007
goose3/resources/text/stopwords-ko.txt sha256=DSqMnLbnxymFA6iHoHF4VgrS49Rw0HjFuur35fZWcQ0 460
goose3/resources/text/stopwords-lt.txt sha256=eIPp3zR_feL3p_GQlp-Ll3bI93WRBO8EmuxAUNbb8so 764
goose3/resources/text/stopwords-mk.txt sha256=CKEzV8NJDb4jq3C97P7MuSd8qwFmcYjODGXwC3HxS78 1504
goose3/resources/text/stopwords-nb.txt sha256=VuZbxq0aq66b4MkwezWQqtdPMxGOs8kO7IF2NSqPTSk 587
goose3/resources/text/stopwords-nl.txt sha256=GfMWt-rO7i3IcHRCsPvZUXARVNkor3_dZrz0Tqadrkk 177
goose3/resources/text/stopwords-no.txt sha256=9hp0ky1DpC463zMMTE6Se9DHuFGXRPUV6injyjlJngQ 514
goose3/resources/text/stopwords-pl.txt sha256=z1A4NOX5ZxFrWzUcBMKd92_EB4XgmEnWE78br2GykhU 2016
goose3/resources/text/stopwords-pt.txt sha256=I1xLWCygpgkd9ZZ8hYSjQ7jr4shuj-3ySSf2hkddwsc 3610
goose3/resources/text/stopwords-ro.txt sha256=E4AZ5e-RhGmKcJxxzCfkUW4SKeRUUaYAWmqVFVgtjgU 1916
goose3/resources/text/stopwords-ru.txt sha256=soQOPcfR18HOcSoZWzWFkajvSrLG9pj-A4yP1pmrneo 4958
goose3/resources/text/stopwords-sl.txt sha256=H0K15wVGRZn43AeLZsRuF1p0NZvx30mL4o9yGM_RuoM 2436
goose3/resources/text/stopwords-sr.txt sha256=X1-W65P7NvABWA8VtINg974KbJn4P1bYhn9CztZ6NWk 2122
goose3/resources/text/stopwords-sv.txt sha256=3zPGM3lwfP6mCrkbqSLP06zqppSucA8OLWyXk82y1CM 3796
goose3/resources/text/stopwords-sw.txt sha256=pHuLPf47kDGCjRsImLD-ush8c2nafOci1TID2GAzCxM 407
goose3/resources/text/stopwords-ta.txt sha256=Cky3uOegcUZbchWLRRe25XzcaYIL4Tg0kV5_kewRBpg 1964
goose3/resources/text/stopwords-te.txt sha256=oJmyYsrwItUH2d5cT4loPVykuj-vMktQEW7YYcDpD5o 901
goose3/resources/text/stopwords-th.txt sha256=ZzOqQCVYz6sCTRuY7OaJ5eMbZiF0vFFZRmTpCLGwVtI 1420
goose3/resources/text/stopwords-tr.txt sha256=CQLIhb35bsYDvcLa-UpXNI3FuzvvQRGI6VqiGC7fe5k 1368
goose3/resources/text/stopwords-uk.txt sha256=jnA0kjaFBpFCu5C4mCxtLYDMvN7-ds-G-Y-NPK955_g 4030
goose3/resources/text/stopwords-ur.txt sha256=eDTyXVnyMQ4Ja90XAyArSfpRUF18IDydWFoToDhAA64 4359
goose3/resources/text/stopwords-vi.txt sha256=038u06SJb4rzOIPZAgCkSzvK8t9LH8_clDWluHUEJVo 724
goose3/resources/text/stopwords-zh.txt sha256=H40b0HSxc1xjjSZ3k9HkHIXYs8GA5HFzV3-zG9C9vio 625
goose3/utils/__init__.py sha256=DmdEniclZPNeSokLsXEZxl282efb4uAt_kKNou02kxw 4883
goose3/utils/constants.py sha256=4SYzl789JAOnRwAK_TxCg_UXwYb6iGu-jMS6ewe67QY 59
goose3/utils/encoding.py sha256=NykTlHX2Jeo27Mn-EsiA4hrMPBmBVlbenqOA8NrcWGY 4617
goose3/utils/images.py sha256=LT9JB1F1QriF2wVD6hc00Tmo6YZDlFav37_ZL5FcxzU 4525
goose3-3.1.19.dist-info/LICENSE.txt sha256=MgPtWq4KgZZomM9YDUuEDWMwg1z2O7GbaCZohrxXIug 10850
goose3-3.1.19.dist-info/METADATA sha256=OVJJPjGaOrJ4MwlBcBzfUpQF4WxxBplIj9q2W-gX3as 11817
goose3-3.1.19.dist-info/WHEEL sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM 92
goose3-3.1.19.dist-info/top_level.txt sha256=eLFFFniIuULP4NXUJO6PH9745_AIBOoKNQBmT9pTpyE 7
goose3-3.1.19.dist-info/RECORD

top_level.txt

goose3