246 lines
5.8 KiB
Julia
246 lines
5.8 KiB
Julia
using DataStructures
|
|
|
|
mutable struct node
|
|
word::String
|
|
neighbours::Vector{Int}
|
|
colored::Bool
|
|
end
|
|
|
|
function readInput(file="wordlist-german.txt";filter=(x->true))
|
|
words = Vector{String}()
|
|
for line in eachline(file)
|
|
if !filter(line)
|
|
continue
|
|
end
|
|
push!(words, line)
|
|
end
|
|
return sort(words)
|
|
end
|
|
|
|
function createGraph(words::Vector{String})
|
|
maxNeighbours = 0
|
|
graph = Vector{node}()
|
|
for w in words
|
|
_, myself = binarySearch(w, words)
|
|
neighbours = Vector{Int}()
|
|
for x in niceLevenshteinNeighbours(w)
|
|
exists, index = binarySearch(x, words)
|
|
if exists && index != myself
|
|
push!(neighbours, index)
|
|
end
|
|
end
|
|
maxNeighbours = max(maxNeighbours,length(neighbours))
|
|
if length(neighbours) == maxNeighbours
|
|
println(w)
|
|
end
|
|
n = node(w, neighbours, false)
|
|
push!(graph, n)
|
|
end
|
|
return graph
|
|
end
|
|
|
|
function countSubwords(words::Vector{String})
|
|
subwords = Vector{Vector{Int}}()
|
|
for i in 1:26
|
|
push!(subwords,Vector{Int}())
|
|
for j in 1:26
|
|
push!(subwords[i],0)
|
|
end
|
|
end
|
|
|
|
for w in words
|
|
word = collect(w)
|
|
for i in 1:length(w)-1
|
|
|
|
if lowercase(word[i]) == 'y' && lowercase(word[i+1]) == 'y'
|
|
println(w)
|
|
end
|
|
|
|
first = Int(lowercase(word[i])) - 96
|
|
second = Int(lowercase(word[i+1])) - 96
|
|
if first > 26
|
|
println(word[i])
|
|
end
|
|
if second > 26
|
|
println(word[i+1])
|
|
end
|
|
subwords[first][second] += 1
|
|
end
|
|
end
|
|
|
|
max = 0
|
|
maxI = 0
|
|
maxJ = 0
|
|
|
|
min = 1000000
|
|
minI = 0
|
|
minJ = 0
|
|
|
|
noOcc = 0
|
|
|
|
for i in eachindex(subwords)
|
|
for j in eachindex(subwords[i])
|
|
if subwords[i][j] > max
|
|
max = subwords[i][j]
|
|
maxI = i
|
|
maxJ = j
|
|
end
|
|
if subwords[i][j] < min && subwords[i][j] > 0
|
|
min = subwords[i][j]
|
|
minI = i
|
|
minJ = j
|
|
end
|
|
if subwords[i][j] == 0
|
|
noOcc += 1
|
|
println(i, " and ", j)
|
|
end
|
|
end
|
|
end
|
|
|
|
println("Maximum: ", max)
|
|
println("MaximumI: ", maxI)
|
|
println("MaximumJ: ", maxJ)
|
|
|
|
println("Minimum: ", min)
|
|
println("MinimumI: ", minI)
|
|
println("MinimumJ: ", minJ)
|
|
|
|
println(noOcc)
|
|
return subwords
|
|
end
|
|
|
|
function countNeighbours(w::String,words::Vector{String})
|
|
nr = 0
|
|
neighbours = []
|
|
for n in LevenshteinNeighbours(w)
|
|
exists, _ = binarySearch(n,words)
|
|
if exists && n != w
|
|
nr += 1
|
|
push!(neighbours,n)
|
|
end
|
|
end
|
|
println("Number of neighbours: ", nr)
|
|
println(neighbours)
|
|
end
|
|
|
|
|
|
function binarySearch(word::String, words::Vector{String})
|
|
left = 1
|
|
right = length(words)
|
|
while left < right
|
|
middle = floor(Int, (right + left) / 2)
|
|
if words[middle] < word
|
|
left = middle + 1
|
|
elseif words[middle] > word
|
|
right = middle - 1
|
|
else
|
|
return true, middle
|
|
end
|
|
end
|
|
return false, 0
|
|
end
|
|
|
|
function LevenshteinNeighbours(w::String)
|
|
neighbours = Vector{String}()
|
|
w = collect(w)
|
|
for i in eachindex(w)
|
|
for c in union('a':'z','A':'Z')
|
|
word = copy(w)
|
|
word[i] = c
|
|
push!(neighbours, String(word))
|
|
|
|
word = insert!(copy(w),i,c)
|
|
push!(neighbours, String(word))
|
|
end
|
|
word = deleteat!(copy(w), i)
|
|
push!(neighbours, String(word))
|
|
end
|
|
|
|
for c in union('a':'z','A':'Z')
|
|
word = copy(w)
|
|
push!(word, c)
|
|
push!(neighbours, String(word))
|
|
end
|
|
return neighbours
|
|
end
|
|
|
|
function niceLevenshteinNeighbours(w::String)
|
|
neighbours = Vector{String}()
|
|
w = collect(w)
|
|
for i in eachindex(w)
|
|
|
|
if i == 1
|
|
for c in union('A':'Z','a':'z')
|
|
word = copy(w)
|
|
word[i] = c
|
|
push!(neighbours, String(word))
|
|
|
|
if w[1] ∈ 'A':'Z'
|
|
continue
|
|
end
|
|
word = insert!(copy(w),i,c)
|
|
push!(neighbours, String(word))
|
|
end
|
|
|
|
else
|
|
for c in 'a':'z'
|
|
word = copy(w)
|
|
word[i] = c
|
|
push!(neighbours, String(word))
|
|
|
|
word = insert!(copy(w),i,c)
|
|
push!(neighbours, String(word))
|
|
end
|
|
end
|
|
|
|
word = deleteat!(copy(w), i)
|
|
push!(neighbours, String(word))
|
|
end
|
|
|
|
for c in 'a':'z'
|
|
word = copy(w)
|
|
push!(word, c)
|
|
push!(neighbours, String(word))
|
|
end
|
|
return neighbours
|
|
end
|
|
|
|
function BFS(g::Vector{node})
|
|
graph = deepcopy(g)
|
|
components = 0
|
|
biggestComponent = 0
|
|
|
|
q = Queue{node}()
|
|
|
|
for i in eachindex(graph)
|
|
if graph[i].colored == false
|
|
|
|
components += 1
|
|
currComponent = 1
|
|
enqueue!(q, graph[i])
|
|
graph[i].colored = true
|
|
|
|
while !isempty(q)
|
|
n = dequeue!(q)
|
|
for m in n.neighbours
|
|
if graph[m].colored == false
|
|
currComponent += 1
|
|
enqueue!(q, graph[m])
|
|
graph[m].colored = true
|
|
end
|
|
end
|
|
end
|
|
|
|
biggestComponent = max(biggestComponent, currComponent)
|
|
if biggestComponent == currComponent
|
|
println("Wort in der größten Komponente: ", graph[i].word)
|
|
end
|
|
end
|
|
end
|
|
|
|
println("Anzahl Zusammenhangskomponenten: ", components)
|
|
println("Größe der größten Zusammenhangskomponente: ", biggestComponent)
|
|
end
|
|
|
|
|