From 56a5c8a6a2085bd3b00f3f9816794d400e474562 Mon Sep 17 00:00:00 2001 From: bMorgan01 Date: Tue, 29 Mar 2022 20:05:35 -0600 Subject: [PATCH] debug messages --- crawl.conf | 2 +- main.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/crawl.conf b/crawl.conf index ba6dcb5..3524db4 100644 --- a/crawl.conf +++ b/crawl.conf @@ -3,7 +3,7 @@ benrmorgan.com # Prefix Ex: http://www. http://www. # Target path Ex /var/www/html/sitemap.xml or ./sitemaps/sitemap.xml -/var/www/html/sitemap.xml +sitemap.xml # Ignore urls containing Ex: /files/ /files/ /images/ \ No newline at end of file diff --git a/main.py b/main.py index a3d1076..73b9bd6 100644 --- a/main.py +++ b/main.py @@ -60,6 +60,8 @@ def cmp(p1, p2): def main(): + print("Reading conf...") + conf = [] with open('crawl.conf', 'r') as file: for line in file.readlines(): @@ -74,14 +76,17 @@ def main(): ignores = conf[3::] + print("Crawling site...") links = spider(prefix, domain, ignores) date = datetime.datetime.utcnow() existed = exists(path) oldpath = path if existed: + print("Sitemap already exists, creating temp...") path = "newmap.xml" + print("Writing to target file...") out = open(path, 'w') out.write("