From 7462511e2db03a2abd7f3414be571e421a70b494 Mon Sep 17 00:00:00 2001 From: bMorgan01 Date: Sun, 25 Sep 2022 11:21:04 -0600 Subject: [PATCH] crawl.conf ini --- crawl.conf | 33 +++++++-------------------------- main.py | 18 ++++++++---------- 2 files changed, 15 insertions(+), 36 deletions(-) diff --git a/crawl.conf b/crawl.conf index 0d3b639..95a101f 100755 --- a/crawl.conf +++ b/crawl.conf @@ -1,26 +1,7 @@ -# Target -http://www.benrmorgan.com -# Ignore urls containing Ex: /files/ -/files/ -/images/ -# Custom Dictionary Ex: Strato -Strato -Rainmeter -WebinarsGUI -LON-CAPA -EMWAVE -FACQUAD -dx -dy -PATHAG -PNTSLOPE -PERPLINE -QUADRATI -kerf -toolset -cron -GitHub users -RogerHub -mml -recurse -Tac +[Config] +; Target site +site = http://www.benrmorgan.com +; Ignore urls containing Ex: /files/ +ignore = /files/, /images/ +# Custom Dictionary Ex: Strato, EMWAVE +dictionary = Strato, Rainmeter, WebinarsGUI, LON-CAPA, EMWAVE, FACQUAD, dx, dy, PATHAG, PNTSLOPE, PERPLINE, QUADRATI, kerf, toolset, cron, GitHub users, RogerHub, mml, recurse, Tac diff --git a/main.py b/main.py index 830d34a..445a7b6 100755 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ +import configparser import os import re from stat import S_ISFIFO @@ -87,16 +88,13 @@ def main(report: bool): if not report: print("Reading conf...") - conf = [] - with open('crawl.conf', 'r') as file: - for line in file.readlines(): - line = line.replace("\n", "") - line = line.replace("\r", "") - conf.append(line) + config = configparser.ConfigParser() + config.read('crawl.conf') + config = config['Config'] - target = conf[1] - ignores = conf[3:conf.index("# Custom Dictionary Ex: Strato")] - custDict = conf[conf.index("# Custom Dictionary Ex: Strato") + 1::] + target = config['site'] + ignores = config['ignore'].split(', ') + cust_dict = config['dictionary'].split(', ') if not report: print("Crawling site...") @@ -130,7 +128,7 @@ def main(report: bool): matches = tools[links[l][1]].check(text) all_matches += len(matches) matches = [match for match in matches if - match.context[match.offsetInContext:match.offsetInContext + match.errorLength] not in custDict] + match.context[match.offsetInContext:match.offsetInContext + match.errorLength] not in cust_dict] all_filtered_matches += len(matches) if len(matches) > 0: