From 71d5df79500abbd34933dd2a987aa9da1e48386b Mon Sep 17 00:00:00 2001 From: Jonas Seiler Date: Wed, 4 Dec 2024 11:11:28 +0100 Subject: [PATCH] add custom word list input, add more telemetry, rewrite Levenshtein function --- main.jl | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 3 deletions(-) diff --git a/main.jl b/main.jl index 38d6cb0..091ac07 100644 --- a/main.jl +++ b/main.jl @@ -6,9 +6,9 @@ mutable struct node colored::Bool end -function readInput(filter=(x->true)) +function readInput(file="wordlist-german.txt";filter=(x->true)) words = Vector{String}() - for line in eachline("wordlist-german.txt") + for line in eachline(file) if !filter(line) continue end @@ -18,22 +18,112 @@ function readInput(filter=(x->true)) end function createGraph(words::Vector{String}) + maxNeighbours = 0 graph = Vector{node}() for w in words _, myself = binarySearch(w, words) neighbours = Vector{Int}() - for x in LevenshteinNeighbours(w) + for x in niceLevenshteinNeighbours(w) exists, index = binarySearch(x, words) if exists && index != myself push!(neighbours, index) end end + maxNeighbours = max(maxNeighbours,length(neighbours)) + if length(neighbours) == maxNeighbours + println(w) + end n = node(w, neighbours, false) push!(graph, n) end return graph end +function countSubwords(words::Vector{String}) + subwords = Vector{Vector{Int}}() + for i in 1:26 + push!(subwords,Vector{Int}()) + for j in 1:26 + push!(subwords[i],0) + end + end + + for w in words + word = collect(w) + for i in 1:length(w)-1 + + if lowercase(word[i]) == 'y' && lowercase(word[i+1]) == 'y' + println(w) + end + + first = Int(lowercase(word[i])) - 96 + second = Int(lowercase(word[i+1])) - 96 + if first > 26 + println(word[i]) + end + if second > 26 + println(word[i+1]) + end + subwords[first][second] += 1 + end + end + + max = 0 + maxI = 0 + maxJ = 0 + + min = 1000000 + minI = 0 + minJ = 0 + + noOcc = 0 + + for i in eachindex(subwords) + for j in eachindex(subwords[i]) + if subwords[i][j] > max + max = subwords[i][j] + maxI = i + maxJ = j + end + if subwords[i][j] < min && subwords[i][j] > 0 + min = subwords[i][j] + minI = i + minJ = j + end + if subwords[i][j] == 0 + noOcc += 1 + println(i, " and ", j) + end + end + end + + println("Maximum: ", max) + println("MaximumI: ", maxI) + println("MaximumJ: ", maxJ) + + println("Minimum: ", min) + println("MinimumI: ", minI) + println("MinimumJ: ", minJ) + + println(noOcc) + return subwords +end + +function countNeighbours(w::String,words::Vector{String}) + nr = 0 + neighbours = [] + for n in LevenshteinNeighbours(w) + exists, _ = binarySearch(n,words) + if exists && n != w + nr += 1 + push!(neighbours,n) + end + end + println("Number of neighbours: ", nr) + println(neighbours) +end + + function binarySearch(word::String, words::Vector{String}) left = 1 right = length(words) @@ -74,6 +164,47 @@ function LevenshteinNeighbours(w::String) return neighbours end +function niceLevenshteinNeighbours(w::String) + neighbours = Vector{String}() + w = collect(w) + for i in eachindex(w) + + if i == 1 + for c in union('A':'Z','a':'z') + word = copy(w) + word[i] = c + push!(neighbours, String(word)) + + if w[1] ∈ 'A':'Z' + continue + end + word = insert!(copy(w),i,c) + push!(neighbours, String(word)) + end + + else + for c in 'a':'z' + word = copy(w) + word[i] = c + push!(neighbours, String(word)) + + word = insert!(copy(w),i,c) + push!(neighbours, String(word)) + end + end + + word = deleteat!(copy(w), i) + push!(neighbours, String(word)) + end + + for c in 'a':'z' + word = copy(w) + push!(word, c) + push!(neighbours, String(word)) + end + return neighbours +end + function BFS(g::Vector{node}) graph = deepcopy(g) components = 0