diff --git a/crawl.conf b/crawl.conf index ba6dcb5..3524db4 100644 --- a/crawl.conf +++ b/crawl.conf @@ -3,7 +3,7 @@ benrmorgan.com # Prefix Ex: http://www. http://www. # Target path Ex /var/www/html/sitemap.xml or ./sitemaps/sitemap.xml -/var/www/html/sitemap.xml +sitemap.xml # Ignore urls containing Ex: /files/ /files/ /images/ \ No newline at end of file diff --git a/main.py b/main.py index a3d1076..73b9bd6 100644 --- a/main.py +++ b/main.py @@ -60,6 +60,8 @@ def cmp(p1, p2): def main(): + print("Reading conf...") + conf = [] with open('crawl.conf', 'r') as file: for line in file.readlines(): @@ -74,14 +76,17 @@ def main(): ignores = conf[3::] + print("Crawling site...") links = spider(prefix, domain, ignores) date = datetime.datetime.utcnow() existed = exists(path) oldpath = path if existed: + print("Sitemap already exists, creating temp...") path = "newmap.xml" + print("Writing to target file...") out = open(path, 'w') out.write("