Skip to content

Commit 98ee732

Browse files
committed
filter obviously bad tweets
1 parent 8d28c8d commit 98ee732

File tree

1 file changed

+21
-5
lines changed

1 file changed

+21
-5
lines changed

TwitterWordle.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ def flatten_list(list_of_lists):
1010
return [y for x in list_of_lists for y in x]
1111

1212

13+
def check_match(x):
14+
if re.search(r"Wordle \d{3}", x):
15+
return True
16+
return False
17+
18+
1319
class TwitterWordle():
1420
def __init__(self, tweet_df=None, use_limited_targets=True):
1521
if use_limited_targets:
@@ -24,6 +30,9 @@ def __init__(self, tweet_df=None, use_limited_targets=True):
2430
if tweet_df is not None:
2531
assert isinstance(tweet_df, pd.DataFrame), 'Must be a dataframe'
2632
self.tweet_df = tweet_df
33+
if self.tweet_df is not None:
34+
self.tweet_df = self.tweet_df.loc[tweet_df['tweet_text'].apply(
35+
check_match)]
2736

2837
@staticmethod
2938
def process_counter(target_dictionary, c, penalty_term=-5E7, min_count=5):
@@ -62,9 +71,12 @@ def extract_all_guesses(self, wordle_num, downsample=None, verbose=True):
6271
print(
6372
f"{len(self.tweet_df.query(f'wordle_id == {wordle_num}'))} tweets for wordle {wordle_num}"
6473
)
65-
return flatten_list(
66-
(self.tweet_df.query(f'wordle_id == {wordle_num}')
67-
['tweet_text'].apply(self.wordle_guesses)).tolist())
74+
return flatten_list([
75+
x
76+
for x in (self.tweet_df.query(f'wordle_id == {wordle_num}')
77+
['tweet_text'].apply(self.wordle_guesses)).tolist()
78+
if len(x) <= 6
79+
])
6880

6981
return flatten_list((self.tweet_df.query(f'wordle_id == {wordle_num}')
7082
['tweet_text'].apply(self.wordle_guesses)).sample(
@@ -93,6 +105,7 @@ def solve_guess_list(self,
93105
the_guesses = [
94106
x for x in all_guesses if x not in ('22222', '00000')
95107
]
108+
96109
c = Counter(the_guesses)
97110
if not min_count:
98111
min_count = np.floor(np.quantile(list(c.values()), .25))
@@ -136,7 +149,7 @@ def solve(self,
136149
elif tweet_list:
137150
print(f"{len(tweet_list)} tweets")
138151
score_guess_list = flatten_list(
139-
[self.wordle_guesses(x) for x in tweet_list])
152+
[self.wordle_guesses(x) for x in [x for x in tweet_list if check_match(x)]])
140153

141154
prediction, sigma, data, delta_above_two = self.solve_guess_list(
142155
score_guess_list,
@@ -148,12 +161,15 @@ def solve(self,
148161
print(
149162
f'Wordle {wordle_num} initial signal low {delta_above_two:1.3}. Iterating for better parameters'
150163
)
164+
151165
for my_min_count in range(max(min_count - 2, 1), min_count + 10,
152166
2):
153167
if delta_above_two > 1.1:
154168
continue
155169

156170
for p in range(-7, -100, -2):
171+
print(".", end="")
172+
157173
penalty_term = p * 1E7
158174
if delta_above_two > 1.1:
159175
continue
@@ -167,7 +183,7 @@ def solve(self,
167183
iterated_results.append(
168184
(prediction, sigma, data, delta_above_two))
169185
print(
170-
f"Iterated to a better signal with min_count {final_min_count} and penalty {final_penalty_term:.2E}"
186+
f"\nIterated to a better signal with min_count {final_min_count} and penalty {final_penalty_term:.2E}"
171187
)
172188
if delta_above_two < 1.1 and iterate_low_score:
173189
prediction, sigma, data, delta_above_two = sorted(

0 commit comments

Comments
 (0)