crawl.conf ini
This commit is contained in:
parent
0415bbbbf8
commit
7462511e2d
2 changed files with 15 additions and 36 deletions
33
crawl.conf
33
crawl.conf
|
|
@ -1,26 +1,7 @@
|
||||||
# Target
|
[Config]
|
||||||
http://www.benrmorgan.com
|
; Target site
|
||||||
# Ignore urls containing Ex: /files/
|
site = http://www.benrmorgan.com
|
||||||
/files/
|
; Ignore urls containing Ex: /files/
|
||||||
/images/
|
ignore = /files/, /images/
|
||||||
# Custom Dictionary Ex: Strato
|
# Custom Dictionary Ex: Strato, EMWAVE
|
||||||
Strato
|
dictionary = Strato, Rainmeter, WebinarsGUI, LON-CAPA, EMWAVE, FACQUAD, dx, dy, PATHAG, PNTSLOPE, PERPLINE, QUADRATI, kerf, toolset, cron, GitHub users, RogerHub, mml, recurse, Tac
|
||||||
Rainmeter
|
|
||||||
WebinarsGUI
|
|
||||||
LON-CAPA
|
|
||||||
EMWAVE
|
|
||||||
FACQUAD
|
|
||||||
dx
|
|
||||||
dy
|
|
||||||
PATHAG
|
|
||||||
PNTSLOPE
|
|
||||||
PERPLINE
|
|
||||||
QUADRATI
|
|
||||||
kerf
|
|
||||||
toolset
|
|
||||||
cron
|
|
||||||
GitHub users
|
|
||||||
RogerHub
|
|
||||||
mml
|
|
||||||
recurse
|
|
||||||
Tac
|
|
||||||
|
|
|
||||||
18
main.py
18
main.py
|
|
@ -1,3 +1,4 @@
|
||||||
|
import configparser
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from stat import S_ISFIFO
|
from stat import S_ISFIFO
|
||||||
|
|
@ -87,16 +88,13 @@ def main(report: bool):
|
||||||
if not report:
|
if not report:
|
||||||
print("Reading conf...")
|
print("Reading conf...")
|
||||||
|
|
||||||
conf = []
|
config = configparser.ConfigParser()
|
||||||
with open('crawl.conf', 'r') as file:
|
config.read('crawl.conf')
|
||||||
for line in file.readlines():
|
config = config['Config']
|
||||||
line = line.replace("\n", "")
|
|
||||||
line = line.replace("\r", "")
|
|
||||||
conf.append(line)
|
|
||||||
|
|
||||||
target = conf[1]
|
target = config['site']
|
||||||
ignores = conf[3:conf.index("# Custom Dictionary Ex: Strato")]
|
ignores = config['ignore'].split(', ')
|
||||||
custDict = conf[conf.index("# Custom Dictionary Ex: Strato") + 1::]
|
cust_dict = config['dictionary'].split(', ')
|
||||||
|
|
||||||
if not report:
|
if not report:
|
||||||
print("Crawling site...")
|
print("Crawling site...")
|
||||||
|
|
@ -130,7 +128,7 @@ def main(report: bool):
|
||||||
matches = tools[links[l][1]].check(text)
|
matches = tools[links[l][1]].check(text)
|
||||||
all_matches += len(matches)
|
all_matches += len(matches)
|
||||||
matches = [match for match in matches if
|
matches = [match for match in matches if
|
||||||
match.context[match.offsetInContext:match.offsetInContext + match.errorLength] not in custDict]
|
match.context[match.offsetInContext:match.offsetInContext + match.errorLength] not in cust_dict]
|
||||||
all_filtered_matches += len(matches)
|
all_filtered_matches += len(matches)
|
||||||
|
|
||||||
if len(matches) > 0:
|
if len(matches) > 0:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue