gpt-tokenizer

v2.5.0
A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models
BPE encoder decoder tokenizer GPT GPT-2 GPT-3 GPT-3.5 GPT-4 and 8 more...

test

yarn test:format && yarn test:types && yarn test:lint && yarn test:code

build

yarn build:cjs && yarn build:esm && yarn build:umd

clean

git clean -dfX --exclude=node_modules src && beemo typescript:sync-project-refs

format

yarn rrun prettier --write "./{src,tests,.config}/**/!(*.d).{.js,jsx,ts,tsx,json,md}"

prepare

rrun husky install .config/husky && beemo create-config

release

beemo run-script release

build:cjs

yarn rrun tsc --outDir cjs --module commonjs --target es2022 --project tsconfig-cjs.json

build:esm

mkdir -p esm && echo '{"name": "gpt-tokenizer", "type": "module"}' > ./esm/package.json && yarn rrun tsc --outDir esm --module esnext --target es2022

build:umd

yarn build:umd:cl100k_base && yarn build:umd:p50k_base && yarn build:umd:p50k_edit && yarn build:umd:r50k_base && yarn build:umd:o200k_base

test:code

rrun jest

test:lint

rrun eslint 'src/*.{js,jsx,ts,tsx}'

test:types

yarn rrun tsc --noEmit

test:format

yarn rrun prettier --check "./{src,tests,.config}/**/!(*.d).{.js,jsx,ts,tsx,json,md}"

codegen:models

yarn tsx src/codegen/generateByModel.ts

postinstallDev

yarn prepare

codegen:encodings

yarn tsx src/codegen/generateJsEncodings.ts

build:umd:p50k_base

beemo webpack --entry='./src/encoding/p50k_base.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_p50k_base' --env 'filename=p50k_base.js'

build:umd:p50k_edit

beemo webpack --entry='./src/encoding/p50k_edit.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_p50k_edit' --env 'filename=p50k_edit.js'

build:umd:r50k_base

beemo webpack --entry='./src/encoding/r50k_base.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_r50k_base' --env 'filename=r50k_base.js'

build:umd:o200k_base

beemo webpack --entry='./src/encoding/o200k_base.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_o200k_base' --env 'filename=o200k_base.js'

build:umd:cl100k_base

beemo webpack --entry='./src/main.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_cl100k_base' --env 'filename=cl100k_base.js'

Metadata

  • MIT
  • Whatever
  • Bazyli Brzoska
  • released 10/9/2024

Downloads

Maintainers