diff --git a/.gitignore b/.gitignore index 68bc17f..aaa41a2 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +/openwebtext +/data_extract.py diff --git a/eval_split.txt b/eval_split.txt new file mode 100644 index 0000000..1fabb28 Binary files /dev/null and b/eval_split.txt differ diff --git a/train_split.txt b/train_split.txt new file mode 100644 index 0000000..7f16f08 Binary files /dev/null and b/train_split.txt differ diff --git a/vocab.txt b/vocab.txt new file mode 100644 index 0000000..89e9b72 Binary files /dev/null and b/vocab.txt differ