-
Notifications
You must be signed in to change notification settings - Fork 0
/
WordleSim.jl
268 lines (250 loc) · 11.2 KB
/
WordleSim.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
module WordleSim
#An interactive Wordle solver and collection of related functions
#We have a set of valid *attempts* that can be tested against a hidden *answer* to generate an output *pattern* that provides clues to the answer
export startSolver, scoreAttempts, producePattern, patternTest, makeAttempts, tryAnswer, answers, allowed
include("util.jl")
using DelimitedFiles
using InlineStrings
const baseDir = @__DIR__
const answerFile = "wordle-answers-alphabetical.txt"
const allowedFile = "allowed.txt" #Disjoint set with answers - needs to be combined for full set of valid attempts
const freqCount = Dict{Char, Float64}() #Tracks occurrences of a letter, reuse to avoid constant reallocation... Float to track negative/positive zero
answers = Set(readdlm(joinpath(baseDir, answerFile), String7))
allowed = Set(union(answers, readdlm(joinpath(baseDir, allowedFile), String7)))
#Returns average uncertainty remaining by each possible attempt against the set of answers
function makeAttempts(attempts::Set{T}, answers::Set{T}, depth::Int) where T<:AbstractString
if length(answers) == 0 || length(attempts) == 0 || depth > 6
return Dict{T, Float64}("" => 0)
end
attemptRecords = Dict{T, Float64}()
sizehint!(attemptRecords, length(attempts))
@inbounds for attempt in attempts
attemptResults::Float64 = tryAnswer(attempt, answers, depth)
push!(attemptRecords, attempt => attemptResults)
end
return attemptRecords
end
#Returns average information gained/uncertainty reduced in all possible universes by our attempt against set of answers
function tryAnswer(attempt::T, answers, depth::Int) where T<:AbstractString
if depth > 6
return 0.0
end
n = 0.0
startEntropy = log(2, length(answers))
cumInfogain = 0.0
@inbounds for pattern in unique(collect(producePattern(attempt, ans) for ans in answers))
numAnswers = length(filterAnswers(attempt, answers, pattern, freqCount))
if numAnswers == length(answers) #Nothing filtered out = no info gained - ignore, means repeated input
continue
else #Should be impossible to have zero answers, since all patterns are generated against our set of answers
#simUniverse = makeAttempts(filter(a -> a != attempt, allowed), filteredAnswers, depth + 1)
cumInfogain += numAnswers * (startEntropy - log(2, numAnswers)) #+ sum(values(simUniverse))/length(simUniverse))
n += numAnswers
end
end
if n == 0.0
return 0.0::Float64
else
return cumInfogain/n::Float64
end
end
function rankAttempts(aps::Vector{Tuple{T, Vector{}}}) where T <: AbstractString
uncertainty = Vector{Tuple{Float64, T, T}}(undef, length(aps) * length(aps))
#O(N^2)...
n = 0
@inbounds for i1 in eachindex(aps)
for i2 in Iterators.drop(eachindex(aps), i1)
n += 1
uncertainty[n] = (getSharedUncertainty((aps[i1][2]), (aps[i2][2])), aps[i1][1], aps[i2][1])
end
end
return view(uncertainty, 1:n)
end
#Calculate average shared uncertainty remaining between two Attempt-Pattern universes
function getSharedUncertainty(ap1, ap2) where T <: AbstractString
uncertainty = Vector{Int}(undef, length(ap1) * length(ap2))
n = 1
@inbounds for i in eachindex(ap1)
for j in eachindex(ap2)
uncertainty[n] = length(filter(x -> (x in ap1[i])::Bool, ap2[j])) #Only need amount of intersections
n += 1
end
end
filter!(x -> x != 0, uncertainty) #zero intersections = impossible pattern combo, disregard
return (sum(uncertainty .* broadcast(log, 2, uncertainty)) / sum(uncertainty))::Float64
end
function genAttemptsPatternCombos(attempts::Set{T}, answers::Set{T}) where T<:AbstractString
attemptPatternResults = Vector{Tuple{T, Vector{}}}(undef, length(attempts))
i = 1
@inbounds for attempt in attempts
attemptPatternResults[i] = (attempt, getAttemptAnswers(attempt, answers))
i += 1
end
return attemptPatternResults
end
#Generate sets of sets of possible answers remaining for each possible pattern against our pool of answers
#Is it faster to keep the pattern to compare attempt-pattern combos for possible overlap vs just filtering?
function getAttemptAnswers(attempt::T, answers) where T<:AbstractString
patterns = unique(collect(producePattern(attempt, ans) for ans in answers))
patternOutcomes = Vector{Vector{T}}(undef, length(patterns))
@inbounds for i in eachindex(patterns)
patternOutcomes[i] = filterAnswers(attempt, answers, patterns[i], freqCount)
end
return patternOutcomes
end
#Generate all 3^5 (243) possible permutations of green, yellow, black over 5 spaces
function genAllPatterns()
options = ('g', 'y', 'b')
map(t -> collect(t), unique(collect(Iterators.product(options, options, options, options, options))))
end
#Produces (a) pattern output of an attempt against answer
function producePattern(attempt, answer)
retPattern = ['b', 'b', 'b', 'b', 'b']
#Produce Green, then Yellow - remainder is black
@inbounds for i in eachindex(answer)
if answer[i] == attempt[i]
retPattern[i] = 'g'
continue
end
for j in eachindex(attempt)
if answer[i] == attempt[j] && i != j &&answer[j] != attempt[j] && retPattern[j] == 'b'
retPattern[j] = 'y'
break
end
end
end
return retPattern
end
#Returns true if answer passes requirements vs given attempt and pattern - looks for reasons to return false
function patternTest(attempt::T, pattern, answer::T) where T <: AbstractString
for i in eachindex(attempt)
if (pattern[i] == 'g' && answer[i] != attempt[i]) || (answer[i] == attempt[i] && pattern[i] != 'g')
return false
elseif pattern[i] == 'y' #Yellow = +1 minimum amount elsewhere, ok to have more - fails if too few letters
foundLetter = false
sweepPos = 1
for j in eachindex(answer)
if answer[j] == attempt[i] && pattern[j] != 'g' #Found a match for yellow that isn't green
foundLetter = true
for k = sweepPos:i-1 #Don't worry about previous matches, returns false later if we have too few instances of the letter
if attempt[k] == attempt[i] && pattern[k] == 'y' #Match in position j is already spoken for
sweepPos = k + 1 #Start where we left off next match, not recount
foundLetter = false
break
end
end
end
if foundLetter == true #Found unreserved match
break
end
end
if foundLetter == false
return false
end
elseif pattern[i] == 'b' #Black = no instances except as required by yellow/green - fails if too many occurrences
foundLetter = false
sweepPos = 1
for j in eachindex(answer) #Try to match presence of letter vs green/yellows - if excessive, return false
if attempt[i] == answer[j] && pattern[j] != 'g'
foundLetter = true
for k = sweepPos:5
if attempt[k] == attempt[i] && pattern[k] == 'y'
sweepPos = k + 1
foundLetter = false
break
end
end
end
if foundLetter == true
return false
end
end
end
end
return true
end
#Return all answers given an attempt, pattern, and answers pool
#Most time taken up by filtering - modified Direct Acyclical Word Graph as solution?
@views function filterAnswers(attempt::T, answers::Set{T}, pattern, freqCount::Dict{Char, Float64}) where T <: AbstractString
retAnswers = collect(answers)::Vector{T}
@inbounds for i in eachindex(attempt) #Initializing vs blind push with get + default value is pretty much the same performance
push!(freqCount, attempt[i] => 0.0)
end
@inbounds for i in eachindex(pattern)
l = attempt[i]
if pattern[i] == 'g' #Green - Must be the same
filter!(ans -> ans[i] == l, retAnswers)
freqCount[l] += copysign(1, freqCount[l])
elseif pattern[i] == 'y' #Yellow - Must NOT have one in the specific position
filter!(ans -> ans[i] != l, retAnswers)
freqCount[l] += copysign(1, freqCount[l])
elseif signbit(freqCount[l]) == false #Black - No instances in any position beyond amount required by Green/Yellow, track presence by negative sign
freqCount[l] = copysign(freqCount[l], -1)
end
end
@inbounds for (letter::Char, c::Int) in freqCount
if c == 0 #Letter is Black without another instance marked Green/Yellow
filter!(ans -> !contains(ans, letter), retAnswers)
elseif signbit(c) == false #No black
filter!(ans -> count(l -> l == letter, ans) >= c, retAnswers)
else #Presence of both Black and Yellow/Green constricts frequency to single value
filter!(ans -> count(l -> l == letter, ans) == -c, retAnswers)
end
end
@inbounds for k::Char in keys(freqCount)
delete!(freqCount, k)
end
return retAnswers
end
#Interactive Wordle Solver
function startSolver() #VS Code Julia REPL has a bug with readline... https://github.com/julia-vscode/julia-vscode/issues/785
answers = Set(readdlm(joinpath(baseDir, answerFile), String7))
allowed = Set(union(answers, readdlm(joinpath(baseDir, allowedFile), String7)))
a = 1
cont = true
attempt = ""
pattern = ""
while cont
attempt = getUserAttempt(allowed, a)
pattern = getUserPattern()
answers = Set(filterAnswers(String7(attempt), answers, lowercase(pattern.match), freqCount))
println("$(length(answers)) remaining possible answers: ")
for ans in answers
print("$(ans) ")
end
println()
if length(answers) <= 1
cont = false
println("Solution found!")
break
end
println()
attemptScores = makeAttempts(allowed, answers, 0)
nextAttempt = recommended!(attemptScores, answers)
println("Recommended next attempt: $(nextAttempt)")
println()
a += 1
end
end
#Returns the highest scored attempt; prioritizes attempts that are in the answer set if there is a tie
function recommended!(attemptScores, answers)
attemptScores = sort(collect(attemptScores), by=x->x[2], rev=true)
filter!(att -> att[2] == attemptScores[1][2], attemptScores)
nextAttempt = attemptScores[1]
for (attempt, _) in attemptScores
if attempt in answers
nextAttempt = attempt
break
end
end
return nextAttempt
end
#Writes naive information content of each possible attempt to file
function scoreAttempts()
answers = Set(readdlm(joinpath(baseDir, answerFile), String7))
allowed = Set(union(answers, readdlm(joinpath(baseDir, allowedFile), String7)))
scores = makeAttempts(allowed, answers, 0)
writeSorted("attempt scores.txt", scores, 2)
end
startSolver()
end