From 75d88ae1b186580a98a2e0d2b5352559c02a4ddb Mon Sep 17 00:00:00 2001 From: bMorgan01 Date: Sun, 25 Sep 2022 12:09:04 -0600 Subject: [PATCH] limit "broken codes" --- main.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 22ba6b7..88f9eb7 100644 --- a/main.py +++ b/main.py @@ -64,10 +64,11 @@ def spider_rec(page_links, current_href, base_parse, exclude): spider_rec(page_links, href, base_parse, exclude) except HTTPError as e: - if parse_result.hostname == base_parse.hostname: - page_links[postfix] = e - else: - page_links[current_href] = e + if e.code == 400 or e.code in range(404, 500): + if parse_result.hostname == base_parse.hostname: + page_links[postfix] = e + else: + page_links[current_href] = e return page_links