prettify
This commit is contained in:
parent
5ad1802e17
commit
75c911f6d2
1 changed files with 28 additions and 3 deletions
31
main.py
31
main.py
|
|
@ -1,3 +1,4 @@
|
|||
import http
|
||||
import bs4
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
|
|
@ -10,8 +11,10 @@ def spider_rec(page_links, prefix, domain, postfix, exclude):
|
|||
req = Request(prefix + domain + postfix)
|
||||
html_page = urlopen(req)
|
||||
|
||||
print(html_page.status)
|
||||
page_links[postfix] = []
|
||||
if int(html_page.status) >= 400:
|
||||
page_links[postfix] = html_page
|
||||
else:
|
||||
page_links[postfix] = []
|
||||
|
||||
soup = bs4.BeautifulSoup(html_page, "lxml")
|
||||
|
||||
|
|
@ -54,7 +57,29 @@ def main():
|
|||
ignores = conf[5:]
|
||||
|
||||
print("Crawling site...")
|
||||
links = spider(prefix, domain, ignores)
|
||||
pages = spider(prefix, domain, ignores)
|
||||
|
||||
count = 0
|
||||
for link in pages.keys():
|
||||
if type(pages[link]) == http.client.HTTPResponse:
|
||||
count += 1
|
||||
|
||||
found = []
|
||||
for search_link in pages.keys():
|
||||
if type(pages[link]) != http.client.HTTPResponse:
|
||||
for href in pages[link]:
|
||||
if href == link:
|
||||
found.append(href)
|
||||
|
||||
print(''.join(['='] * 100))
|
||||
print(link, pages[link].status, pages[link].reason)
|
||||
print(''.join(['-'] * 100))
|
||||
print("Found in:")
|
||||
|
||||
for href in found:
|
||||
print(href)
|
||||
|
||||
print("Done.")
|
||||
|
||||
|
||||
main()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue