Classes | |
class | Block |
Transformer block: communication followed by computation. More... | |
class | FeedFoward |
a simple linear layer followed by a non-linearity More... | |
class | GPTLanguageModel |
class | Head |
one head of self-attention More... | |
class | MultiHeadAttention |
multiple heads of self-attention in parallel More... | |
Functions | |
get_batch (split) | |
estimate_loss () | |
Variables | |
int | batch_size = 64 |
int | block_size = 256 |
int | max_iters = 5000 |
int | eval_interval = 500 |
int | learning_rate = 3e-4 |
str | device = "mps" |
int | eval_iters = 200 |
int | n_embd = 384 |
int | n_head = 6 |
int | n_layer = 6 |
float | dropout = 0.2 |
encoding | |
text = f.read() | |
chars = sorted(list(set(text))) | |
vocab_size = len(chars) | |
dict | stoi = {ch: i for i, ch in enumerate(chars)} |
dict | itos = {i: ch for i, ch in enumerate(chars)} |
encode | |
str | decode |
data = torch.tensor(encode(text), dtype=torch.long) | |
n = int(0.9 * len(data)) | |
train_data = data[:n] | |
val_data = data[n:] | |
model = GPTLanguageModel() | |
m = model.to(device) | |
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) | |
losses = estimate_loss() | |
xb | |
yb | |
logits | |
loss | |
set_to_none | |
context = torch.zeros((1, 1), dtype=torch.long, device=device) | |
working_gpt.estimate_loss | ( | ) |
Definition at line 66 of file working_gpt.py.
References get_batch(), and model.
working_gpt.get_batch | ( | split | ) |
Definition at line 55 of file working_gpt.py.
Referenced by estimate_loss().
int working_gpt.batch_size = 64 |
Definition at line 6 of file working_gpt.py.
int working_gpt.block_size = 256 |
Definition at line 7 of file working_gpt.py.
working_gpt.chars = sorted(list(set(text))) |
Definition at line 35 of file working_gpt.py.
Definition at line 250 of file working_gpt.py.
Definition at line 48 of file working_gpt.py.
str working_gpt.decode |
Definition at line 43 of file working_gpt.py.
str working_gpt.device = "mps" |
Definition at line 13 of file working_gpt.py.
float working_gpt.dropout = 0.2 |
Definition at line 25 of file working_gpt.py.
working_gpt.encode |
Definition at line 40 of file working_gpt.py.
working_gpt.encoding |
Definition at line 31 of file working_gpt.py.
int working_gpt.eval_interval = 500 |
Definition at line 9 of file working_gpt.py.
int working_gpt.eval_iters = 200 |
Definition at line 21 of file working_gpt.py.
dict working_gpt.itos = {i: ch for i, ch in enumerate(chars)} |
Definition at line 39 of file working_gpt.py.
int working_gpt.learning_rate = 3e-4 |
Definition at line 10 of file working_gpt.py.
working_gpt.logits |
Definition at line 244 of file working_gpt.py.
working_gpt.loss |
Definition at line 244 of file working_gpt.py.
working_gpt.losses = estimate_loss() |
Definition at line 235 of file working_gpt.py.
working_gpt.m = model.to(device) |
Definition at line 224 of file working_gpt.py.
int working_gpt.max_iters = 5000 |
Definition at line 8 of file working_gpt.py.
working_gpt.model = GPTLanguageModel() |
Definition at line 223 of file working_gpt.py.
Referenced by estimate_loss().
working_gpt.n = int(0.9 * len(data)) |
Definition at line 49 of file working_gpt.py.
int working_gpt.n_embd = 384 |
Definition at line 22 of file working_gpt.py.
int working_gpt.n_head = 6 |
Definition at line 23 of file working_gpt.py.
int working_gpt.n_layer = 6 |
Definition at line 24 of file working_gpt.py.
working_gpt.optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) |
Definition at line 229 of file working_gpt.py.
working_gpt.set_to_none |
Definition at line 245 of file working_gpt.py.
dict working_gpt.stoi = {ch: i for i, ch in enumerate(chars)} |
Definition at line 38 of file working_gpt.py.
working_gpt.text = f.read() |
Definition at line 32 of file working_gpt.py.
Definition at line 50 of file working_gpt.py.
working_gpt.val_data = data[n:] |
Definition at line 51 of file working_gpt.py.
working_gpt.vocab_size = len(chars) |
Definition at line 36 of file working_gpt.py.
working_gpt.xb |
Definition at line 241 of file working_gpt.py.
working_gpt.yb |
Definition at line 241 of file working_gpt.py.