From 4b6abfd5110f374de13e5c7d50754b89d23f8b51 Mon Sep 17 00:00:00 2001 From: bMorgan01 Date: Sun, 25 Sep 2022 10:02:27 -0600 Subject: [PATCH] error safety --- main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index a287f6a..13c6fc1 100755 --- a/main.py +++ b/main.py @@ -22,6 +22,9 @@ def spider_rec(page_texts, prefix, domain, postfix, exclude): soup = bs4.BeautifulSoup(html_page, "lxml") page_texts[postfix] = [soup.getText(), soup.find_all('html')[0].get("lang")] + if page_texts[postfix][1] is None: + page_texts[postfix][1] = 'en-us' + for link in soup.findAll('a'): href = link.get('href') if "mailto:" not in href and (domain in href or href[0] == '/'): @@ -91,7 +94,7 @@ def main(report: bool): tools = dict() langs = [] for l in links.keys(): - if links[l][1] not in langs: + if links[l][1] not in langs and links[l][1] is not None: langs.append(links[l][1]) for lang in langs: