add custom word list input, add more telemetry, rewrite Levenshtein function

This commit is contained in:
Jonas Seiler 2024-12-04 11:11:28 +01:00
parent cfed832b8b
commit 71d5df7950

137
main.jl
View File

@ -6,9 +6,9 @@ mutable struct node
colored::Bool
end
function readInput(filter=(x->true))
function readInput(file="wordlist-german.txt";filter=(x->true))
words = Vector{String}()
for line in eachline("wordlist-german.txt")
for line in eachline(file)
if !filter(line)
continue
end
@ -18,22 +18,112 @@ function readInput(filter=(x->true))
end
function createGraph(words::Vector{String})
maxNeighbours = 0
graph = Vector{node}()
for w in words
_, myself = binarySearch(w, words)
neighbours = Vector{Int}()
for x in LevenshteinNeighbours(w)
for x in niceLevenshteinNeighbours(w)
exists, index = binarySearch(x, words)
if exists && index != myself
push!(neighbours, index)
end
end
maxNeighbours = max(maxNeighbours,length(neighbours))
if length(neighbours) == maxNeighbours
println(w)
end
n = node(w, neighbours, false)
push!(graph, n)
end
return graph
end
function countSubwords(words::Vector{String})
subwords = Vector{Vector{Int}}()
for i in 1:26
push!(subwords,Vector{Int}())
for j in 1:26
push!(subwords[i],0)
end
end
for w in words
word = collect(w)
for i in 1:length(w)-1
if lowercase(word[i]) == 'y' && lowercase(word[i+1]) == 'y'
println(w)
end
first = Int(lowercase(word[i])) - 96
second = Int(lowercase(word[i+1])) - 96
if first > 26
println(word[i])
end
if second > 26
println(word[i+1])
end
subwords[first][second] += 1
end
end
max = 0
maxI = 0
maxJ = 0
min = 1000000
minI = 0
minJ = 0
noOcc = 0
for i in eachindex(subwords)
for j in eachindex(subwords[i])
if subwords[i][j] > max
max = subwords[i][j]
maxI = i
maxJ = j
end
if subwords[i][j] < min && subwords[i][j] > 0
min = subwords[i][j]
minI = i
minJ = j
end
if subwords[i][j] == 0
noOcc += 1
println(i, " and ", j)
end
end
end
println("Maximum: ", max)
println("MaximumI: ", maxI)
println("MaximumJ: ", maxJ)
println("Minimum: ", min)
println("MinimumI: ", minI)
println("MinimumJ: ", minJ)
println(noOcc)
return subwords
end
function countNeighbours(w::String,words::Vector{String})
nr = 0
neighbours = []
for n in LevenshteinNeighbours(w)
exists, _ = binarySearch(n,words)
if exists && n != w
nr += 1
push!(neighbours,n)
end
end
println("Number of neighbours: ", nr)
println(neighbours)
end
function binarySearch(word::String, words::Vector{String})
left = 1
right = length(words)
@ -74,6 +164,47 @@ function LevenshteinNeighbours(w::String)
return neighbours
end
function niceLevenshteinNeighbours(w::String)
neighbours = Vector{String}()
w = collect(w)
for i in eachindex(w)
if i == 1
for c in union('A':'Z','a':'z')
word = copy(w)
word[i] = c
push!(neighbours, String(word))
if w[1] 'A':'Z'
continue
end
word = insert!(copy(w),i,c)
push!(neighbours, String(word))
end
else
for c in 'a':'z'
word = copy(w)
word[i] = c
push!(neighbours, String(word))
word = insert!(copy(w),i,c)
push!(neighbours, String(word))
end
end
word = deleteat!(copy(w), i)
push!(neighbours, String(word))
end
for c in 'a':'z'
word = copy(w)
push!(word, c)
push!(neighbours, String(word))
end
return neighbours
end
function BFS(g::Vector{node})
graph = deepcopy(g)
components = 0