diff --git a/README.md b/README.md index c30e013d..6f68b7cf 100644 --- a/README.md +++ b/README.md @@ -198,7 +198,6 @@ class MyProxyFetcher(BaseFetcher): |---------------| ---- | -------- | ------ | ----- |------------------------------------------------| | 66代理 | ✔ | ★ | * | [地址](http://www.66ip.cn/) | [`ip66.py`](/fetcher/sources/ip66.py) | | 开心代理 | ✔ | ★ | * | [地址](http://www.kxdaili.com/) | [`kxdaili.py`](/fetcher/sources/kxdaili.py) | - | 快代理 | ✔ | ★ | * | [地址](https://www.kuaidaili.com/) | [`kuaidaili.py`](/fetcher/sources/kuaidaili.py) | | 云代理 | ✔ | ★ | * | [地址](http://www.ip3366.net/) | [`ip3366.py`](/fetcher/sources/ip3366.py) | | 小幻代理 | ✔ | ★★ | * | [地址](https://ip.ihuan.me/) | [`ihuan.py`](/fetcher/sources/ihuan.py) | diff --git a/fetcher/sources/geonode.py b/fetcher/sources/geonode.py index f974ea36..8e317104 100644 --- a/fetcher/sources/geonode.py +++ b/fetcher/sources/geonode.py @@ -20,14 +20,14 @@ class GeonodeFetcher(BaseFetcher): - """Geonode Free Proxy https://geonode.com/free-proxy-list/""" + """Geonode Free Proxy https://geonode.com/""" name = "geonode" - url = "https://geonode.com/free-proxy-list/" + url = "https://geonode.com/" def fetch(self): url = ("https://proxylist.geonode.com/api/proxy-list?" - "limit=500&page=1&sort_by=lastChecked&sort_type=desc") + "filterLastChecked=10&page=1&limit=100&sort_by=lastChecked&sort_type=desc") r = WebRequest().get(url, timeout=5, retry_time=1, verify=False) try: proxies = [] diff --git a/fetcher/sources/ihuan.py b/fetcher/sources/ihuan.py index 1d9bcf60..2981f113 100644 --- a/fetcher/sources/ihuan.py +++ b/fetcher/sources/ihuan.py @@ -12,7 +12,8 @@ """ __author__ = 'JHao' -import re +from lxml import etree +import requests from fetcher.baseFetcher import BaseFetcher from util.webRequest import WebRequest @@ -23,62 +24,20 @@ class IhuanFetcher(BaseFetcher): name = "ihuan" url = "https://ip.ihuan.me/" + enabled = True def fetch(self): - request = WebRequest() - ti_url = "https://ip.ihuan.me/ti.html" - tqdl_url = "https://ip.ihuan.me/tqdl.html" - ti_resp = request.get(ti_url, timeout=10, verify=False) - form_data = {} - if ti_resp.tree is not None: - for input_tag in ti_resp.tree.xpath("//form//input[@name]"): - name = "".join(input_tag.xpath("./@name")).strip() - value = "".join(input_tag.xpath("./@value")).strip() - if name: - form_data[name] = value - - key = form_data.get("key") - if not key: - key_match = re.search( - r'name=["\']key["\'][^>]*value=["\']([^"\']+)', ti_resp.text) - if not key_match: - key_match = re.search( - r'key["\']?\s*[:=]\s*["\']([0-9a-f]{16,})', ti_resp.text) - key = key_match.group(1) if key_match else "" - - if not key: - return - - header = { - "Origin": "https://ip.ihuan.me", - "Referer": ti_url, - } - data = form_data.copy() - data.update({ - "num": "2000", - "port": "", - "kill_port": "", - "address": "", - "kill_address": "", - "anonymity": "", - "type": "", - "post": "", - "sort": "1", - "key": key, - }) - r = request.post(tqdl_url, header=header, data=data, timeout=10, verify=False) - proxies = [] - if r.tree is not None: - for tr in r.tree.xpath("//tr"): - cells = [" ".join(td.xpath(".//text()")).strip() for td in tr.xpath("./td")] - if len(cells) >= 2: - ip_match = re.match(r'^\d{1,3}(?:\.\d{1,3}){3}$', cells[0]) - port_match = re.match(r'^\d{2,5}$', cells[1]) - if ip_match and port_match: - proxies.append("%s:%s" % (cells[0], cells[1])) - proxies.extend(self.parseProxiesFromText(r.text)) - for proxy in self.yieldUniqueProxies(proxies): - yield proxy + wb = WebRequest() + session = requests.session() + headers = wb.header + session.get(self.url, headers=headers, verify=False) # 必须先请求一起获取cookie + res = session.get(self.url, headers=headers, verify=False) + tree = etree.HTML(res.text) + for item in tree.xpath("//table[@class='table table-hover table-bordered']//tr"): + ip = "".join(item.xpath("./td[1]//text()")).strip() + port = "".join(item.xpath("./td[2]//text()")).strip() + if ip and port: + yield "%s:%s" % (ip, port) if __name__ == '__main__': diff --git a/tests/unit/test_fetcher_sources.py b/tests/unit/test_fetcher_sources.py index d074a7f5..abed28b2 100644 --- a/tests/unit/test_fetcher_sources.py +++ b/tests/unit/test_fetcher_sources.py @@ -314,30 +314,37 @@ def test_fetch_old_cross_day_returns_empty(self, mock_dt, mock_wr): class TestIhuanFetcher(object): - @patch("fetcher.sources.ihuan.WebRequest") - def test_fetch(self, mock_wr): + @patch("fetcher.sources.ihuan.requests") + def test_fetch(self, mock_requests): from fetcher.sources.ihuan import IhuanFetcher - ti_tree = etree.HTML( - '
') - post_tree = etree.HTML(_html_table([("1.2.3.4", "8080")])) - - ti_resp = _make_response(tree=ti_tree, text="") - post_resp = _make_response(tree=post_tree, text="1.2.3.4:8080") - - mock_instance = MagicMock() - mock_instance.get.return_value = ti_resp - mock_instance.post.return_value = post_resp - mock_wr.return_value = mock_instance + html = ( + '| 1.2.3.4 | 8080 |
| 5.6.7.8 | 3128 |