error safety
This commit is contained in:
parent
a59526bc55
commit
4b6abfd511
1 changed files with 4 additions and 1 deletions
5
main.py
5
main.py
|
|
@ -22,6 +22,9 @@ def spider_rec(page_texts, prefix, domain, postfix, exclude):
|
||||||
soup = bs4.BeautifulSoup(html_page, "lxml")
|
soup = bs4.BeautifulSoup(html_page, "lxml")
|
||||||
|
|
||||||
page_texts[postfix] = [soup.getText(), soup.find_all('html')[0].get("lang")]
|
page_texts[postfix] = [soup.getText(), soup.find_all('html')[0].get("lang")]
|
||||||
|
if page_texts[postfix][1] is None:
|
||||||
|
page_texts[postfix][1] = 'en-us'
|
||||||
|
|
||||||
for link in soup.findAll('a'):
|
for link in soup.findAll('a'):
|
||||||
href = link.get('href')
|
href = link.get('href')
|
||||||
if "mailto:" not in href and (domain in href or href[0] == '/'):
|
if "mailto:" not in href and (domain in href or href[0] == '/'):
|
||||||
|
|
@ -91,7 +94,7 @@ def main(report: bool):
|
||||||
tools = dict()
|
tools = dict()
|
||||||
langs = []
|
langs = []
|
||||||
for l in links.keys():
|
for l in links.keys():
|
||||||
if links[l][1] not in langs:
|
if links[l][1] not in langs and links[l][1] is not None:
|
||||||
langs.append(links[l][1])
|
langs.append(links[l][1])
|
||||||
|
|
||||||
for lang in langs:
|
for lang in langs:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue