@@ -10,6 +10,12 @@ def flatten_list(list_of_lists):
10
10
return [y for x in list_of_lists for y in x ]
11
11
12
12
13
+ def check_match (x ):
14
+ if re .search (r"Wordle \d{3}" , x ):
15
+ return True
16
+ return False
17
+
18
+
13
19
class TwitterWordle ():
14
20
def __init__ (self , tweet_df = None , use_limited_targets = True ):
15
21
if use_limited_targets :
@@ -24,6 +30,9 @@ def __init__(self, tweet_df=None, use_limited_targets=True):
24
30
if tweet_df is not None :
25
31
assert isinstance (tweet_df , pd .DataFrame ), 'Must be a dataframe'
26
32
self .tweet_df = tweet_df
33
+ if self .tweet_df is not None :
34
+ self .tweet_df = self .tweet_df .loc [tweet_df ['tweet_text' ].apply (
35
+ check_match )]
27
36
28
37
@staticmethod
29
38
def process_counter (target_dictionary , c , penalty_term = - 5E7 , min_count = 5 ):
@@ -62,9 +71,12 @@ def extract_all_guesses(self, wordle_num, downsample=None, verbose=True):
62
71
print (
63
72
f"{ len (self .tweet_df .query (f'wordle_id == { wordle_num } ' ))} tweets for wordle { wordle_num } "
64
73
)
65
- return flatten_list (
66
- (self .tweet_df .query (f'wordle_id == { wordle_num } ' )
67
- ['tweet_text' ].apply (self .wordle_guesses )).tolist ())
74
+ return flatten_list ([
75
+ x
76
+ for x in (self .tweet_df .query (f'wordle_id == { wordle_num } ' )
77
+ ['tweet_text' ].apply (self .wordle_guesses )).tolist ()
78
+ if len (x ) <= 6
79
+ ])
68
80
69
81
return flatten_list ((self .tweet_df .query (f'wordle_id == { wordle_num } ' )
70
82
['tweet_text' ].apply (self .wordle_guesses )).sample (
@@ -93,6 +105,7 @@ def solve_guess_list(self,
93
105
the_guesses = [
94
106
x for x in all_guesses if x not in ('22222' , '00000' )
95
107
]
108
+
96
109
c = Counter (the_guesses )
97
110
if not min_count :
98
111
min_count = np .floor (np .quantile (list (c .values ()), .25 ))
@@ -136,7 +149,7 @@ def solve(self,
136
149
elif tweet_list :
137
150
print (f"{ len (tweet_list )} tweets" )
138
151
score_guess_list = flatten_list (
139
- [self .wordle_guesses (x ) for x in tweet_list ])
152
+ [self .wordle_guesses (x ) for x in [ x for x in tweet_list if check_match ( x )] ])
140
153
141
154
prediction , sigma , data , delta_above_two = self .solve_guess_list (
142
155
score_guess_list ,
@@ -148,12 +161,15 @@ def solve(self,
148
161
print (
149
162
f'Wordle { wordle_num } initial signal low { delta_above_two :1.3} . Iterating for better parameters'
150
163
)
164
+
151
165
for my_min_count in range (max (min_count - 2 , 1 ), min_count + 10 ,
152
166
2 ):
153
167
if delta_above_two > 1.1 :
154
168
continue
155
169
156
170
for p in range (- 7 , - 100 , - 2 ):
171
+ print ("." , end = "" )
172
+
157
173
penalty_term = p * 1E7
158
174
if delta_above_two > 1.1 :
159
175
continue
@@ -167,7 +183,7 @@ def solve(self,
167
183
iterated_results .append (
168
184
(prediction , sigma , data , delta_above_two ))
169
185
print (
170
- f"Iterated to a better signal with min_count { final_min_count } and penalty { final_penalty_term :.2E} "
186
+ f"\n Iterated to a better signal with min_count { final_min_count } and penalty { final_penalty_term :.2E} "
171
187
)
172
188
if delta_above_two < 1.1 and iterate_low_score :
173
189
prediction , sigma , data , delta_above_two = sorted (
0 commit comments