limit "broken codes"
This commit is contained in:
parent
72a195a0e7
commit
75d88ae1b1
1 changed files with 5 additions and 4 deletions
9
main.py
9
main.py
|
|
@ -64,10 +64,11 @@ def spider_rec(page_links, current_href, base_parse, exclude):
|
||||||
|
|
||||||
spider_rec(page_links, href, base_parse, exclude)
|
spider_rec(page_links, href, base_parse, exclude)
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
if parse_result.hostname == base_parse.hostname:
|
if e.code == 400 or e.code in range(404, 500):
|
||||||
page_links[postfix] = e
|
if parse_result.hostname == base_parse.hostname:
|
||||||
else:
|
page_links[postfix] = e
|
||||||
page_links[current_href] = e
|
else:
|
||||||
|
page_links[current_href] = e
|
||||||
|
|
||||||
return page_links
|
return page_links
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue