debug messages

This commit is contained in:
bMorgan01 2022-03-29 20:05:35 -06:00
parent 7d8e7b9065
commit 56a5c8a6a2
2 changed files with 11 additions and 2 deletions

View file

@ -3,7 +3,7 @@ benrmorgan.com
# Prefix Ex: http://www. # Prefix Ex: http://www.
http://www. http://www.
# Target path Ex /var/www/html/sitemap.xml or ./sitemaps/sitemap.xml # Target path Ex /var/www/html/sitemap.xml or ./sitemaps/sitemap.xml
/var/www/html/sitemap.xml sitemap.xml
# Ignore urls containing Ex: /files/ # Ignore urls containing Ex: /files/
/files/ /files/
/images/ /images/

11
main.py
View file

@ -60,6 +60,8 @@ def cmp(p1, p2):
def main(): def main():
print("Reading conf...")
conf = [] conf = []
with open('crawl.conf', 'r') as file: with open('crawl.conf', 'r') as file:
for line in file.readlines(): for line in file.readlines():
@ -74,14 +76,17 @@ def main():
ignores = conf[3::] ignores = conf[3::]
print("Crawling site...")
links = spider(prefix, domain, ignores) links = spider(prefix, domain, ignores)
date = datetime.datetime.utcnow() date = datetime.datetime.utcnow()
existed = exists(path) existed = exists(path)
oldpath = path oldpath = path
if existed: if existed:
print("Sitemap already exists, creating temp...")
path = "newmap.xml" path = "newmap.xml"
print("Writing to target file...")
out = open(path, 'w') out = open(path, 'w')
out.write("<!--\n") out.write("<!--\n")
out.write("\tSitemap generator by Ben Morgan - www.benrmorgan.com\n") out.write("\tSitemap generator by Ben Morgan - www.benrmorgan.com\n")
@ -115,10 +120,14 @@ def main():
out.close() out.close()
if existed and not cmp(oldpath, path): if existed and not cmp(oldpath, path):
print("Creating old sitemap backup...")
move(oldpath, oldpath + "-old") move(oldpath, oldpath + "-old")
print("Overwriting old sitemap with new one...")
move(path, oldpath) move(path, oldpath)
else: elif existed:
print("Sitemaps are the same, removing temp...")
os.remove(path) os.remove(path)
print("Done.")
main() main()