Skip to content

Commit aeca532

Browse files
author
Lőrinc
committed
Add test for encoding huge byte sequences
1 parent 1b9faf2 commit aeca532

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

tests/test_encoding.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,16 @@ def test_simple_regex():
6161
def test_basic_encode():
6262
enc = tiktoken.get_encoding("r50k_base")
6363
assert enc.encode("hello world") == [31373, 995]
64+
assert enc.encode("a" * 1000) == [24794] * 250
6465

6566
enc = tiktoken.get_encoding("p50k_base")
6667
assert enc.encode("hello world") == [31373, 995]
68+
assert enc.encode("a" * 1000) == [24794] * 250
6769

6870
enc = tiktoken.get_encoding("cl100k_base")
6971
assert enc.encode("hello world") == [15339, 1917]
7072
assert enc.encode(" \x850") == [220, 126, 227, 15]
73+
assert enc.encode("a" * 1000) == [70540] * 125
7174

7275

7376
def test_encode_empty():

0 commit comments

Comments
 (0)