add custom word list input, add more telemetry, rewrite Levenshtein function
This commit is contained in:
parent
cfed832b8b
commit
71d5df7950
137
main.jl
137
main.jl
@ -6,9 +6,9 @@ mutable struct node
|
|||||||
colored::Bool
|
colored::Bool
|
||||||
end
|
end
|
||||||
|
|
||||||
function readInput(filter=(x->true))
|
function readInput(file="wordlist-german.txt";filter=(x->true))
|
||||||
words = Vector{String}()
|
words = Vector{String}()
|
||||||
for line in eachline("wordlist-german.txt")
|
for line in eachline(file)
|
||||||
if !filter(line)
|
if !filter(line)
|
||||||
continue
|
continue
|
||||||
end
|
end
|
||||||
@ -18,22 +18,112 @@ function readInput(filter=(x->true))
|
|||||||
end
|
end
|
||||||
|
|
||||||
function createGraph(words::Vector{String})
|
function createGraph(words::Vector{String})
|
||||||
|
maxNeighbours = 0
|
||||||
graph = Vector{node}()
|
graph = Vector{node}()
|
||||||
for w in words
|
for w in words
|
||||||
_, myself = binarySearch(w, words)
|
_, myself = binarySearch(w, words)
|
||||||
neighbours = Vector{Int}()
|
neighbours = Vector{Int}()
|
||||||
for x in LevenshteinNeighbours(w)
|
for x in niceLevenshteinNeighbours(w)
|
||||||
exists, index = binarySearch(x, words)
|
exists, index = binarySearch(x, words)
|
||||||
if exists && index != myself
|
if exists && index != myself
|
||||||
push!(neighbours, index)
|
push!(neighbours, index)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
maxNeighbours = max(maxNeighbours,length(neighbours))
|
||||||
|
if length(neighbours) == maxNeighbours
|
||||||
|
println(w)
|
||||||
|
end
|
||||||
n = node(w, neighbours, false)
|
n = node(w, neighbours, false)
|
||||||
push!(graph, n)
|
push!(graph, n)
|
||||||
end
|
end
|
||||||
return graph
|
return graph
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function countSubwords(words::Vector{String})
|
||||||
|
subwords = Vector{Vector{Int}}()
|
||||||
|
for i in 1:26
|
||||||
|
push!(subwords,Vector{Int}())
|
||||||
|
for j in 1:26
|
||||||
|
push!(subwords[i],0)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
for w in words
|
||||||
|
word = collect(w)
|
||||||
|
for i in 1:length(w)-1
|
||||||
|
|
||||||
|
if lowercase(word[i]) == 'y' && lowercase(word[i+1]) == 'y'
|
||||||
|
println(w)
|
||||||
|
end
|
||||||
|
|
||||||
|
first = Int(lowercase(word[i])) - 96
|
||||||
|
second = Int(lowercase(word[i+1])) - 96
|
||||||
|
if first > 26
|
||||||
|
println(word[i])
|
||||||
|
end
|
||||||
|
if second > 26
|
||||||
|
println(word[i+1])
|
||||||
|
end
|
||||||
|
subwords[first][second] += 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
max = 0
|
||||||
|
maxI = 0
|
||||||
|
maxJ = 0
|
||||||
|
|
||||||
|
min = 1000000
|
||||||
|
minI = 0
|
||||||
|
minJ = 0
|
||||||
|
|
||||||
|
noOcc = 0
|
||||||
|
|
||||||
|
for i in eachindex(subwords)
|
||||||
|
for j in eachindex(subwords[i])
|
||||||
|
if subwords[i][j] > max
|
||||||
|
max = subwords[i][j]
|
||||||
|
maxI = i
|
||||||
|
maxJ = j
|
||||||
|
end
|
||||||
|
if subwords[i][j] < min && subwords[i][j] > 0
|
||||||
|
min = subwords[i][j]
|
||||||
|
minI = i
|
||||||
|
minJ = j
|
||||||
|
end
|
||||||
|
if subwords[i][j] == 0
|
||||||
|
noOcc += 1
|
||||||
|
println(i, " and ", j)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
println("Maximum: ", max)
|
||||||
|
println("MaximumI: ", maxI)
|
||||||
|
println("MaximumJ: ", maxJ)
|
||||||
|
|
||||||
|
println("Minimum: ", min)
|
||||||
|
println("MinimumI: ", minI)
|
||||||
|
println("MinimumJ: ", minJ)
|
||||||
|
|
||||||
|
println(noOcc)
|
||||||
|
return subwords
|
||||||
|
end
|
||||||
|
|
||||||
|
function countNeighbours(w::String,words::Vector{String})
|
||||||
|
nr = 0
|
||||||
|
neighbours = []
|
||||||
|
for n in LevenshteinNeighbours(w)
|
||||||
|
exists, _ = binarySearch(n,words)
|
||||||
|
if exists && n != w
|
||||||
|
nr += 1
|
||||||
|
push!(neighbours,n)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
println("Number of neighbours: ", nr)
|
||||||
|
println(neighbours)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
function binarySearch(word::String, words::Vector{String})
|
function binarySearch(word::String, words::Vector{String})
|
||||||
left = 1
|
left = 1
|
||||||
right = length(words)
|
right = length(words)
|
||||||
@ -74,6 +164,47 @@ function LevenshteinNeighbours(w::String)
|
|||||||
return neighbours
|
return neighbours
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function niceLevenshteinNeighbours(w::String)
|
||||||
|
neighbours = Vector{String}()
|
||||||
|
w = collect(w)
|
||||||
|
for i in eachindex(w)
|
||||||
|
|
||||||
|
if i == 1
|
||||||
|
for c in union('A':'Z','a':'z')
|
||||||
|
word = copy(w)
|
||||||
|
word[i] = c
|
||||||
|
push!(neighbours, String(word))
|
||||||
|
|
||||||
|
if w[1] ∈ 'A':'Z'
|
||||||
|
continue
|
||||||
|
end
|
||||||
|
word = insert!(copy(w),i,c)
|
||||||
|
push!(neighbours, String(word))
|
||||||
|
end
|
||||||
|
|
||||||
|
else
|
||||||
|
for c in 'a':'z'
|
||||||
|
word = copy(w)
|
||||||
|
word[i] = c
|
||||||
|
push!(neighbours, String(word))
|
||||||
|
|
||||||
|
word = insert!(copy(w),i,c)
|
||||||
|
push!(neighbours, String(word))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
word = deleteat!(copy(w), i)
|
||||||
|
push!(neighbours, String(word))
|
||||||
|
end
|
||||||
|
|
||||||
|
for c in 'a':'z'
|
||||||
|
word = copy(w)
|
||||||
|
push!(word, c)
|
||||||
|
push!(neighbours, String(word))
|
||||||
|
end
|
||||||
|
return neighbours
|
||||||
|
end
|
||||||
|
|
||||||
function BFS(g::Vector{node})
|
function BFS(g::Vector{node})
|
||||||
graph = deepcopy(g)
|
graph = deepcopy(g)
|
||||||
components = 0
|
components = 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user