Transformer fundamentals
 
Loading...
Searching...
No Matches
working_gpt.py File Reference

Go to the source code of this file.

Classes

class  working_gpt.Head
 one head of self-attention More...
 
class  working_gpt.MultiHeadAttention
 multiple heads of self-attention in parallel More...
 
class  working_gpt.FeedFoward
 a simple linear layer followed by a non-linearity More...
 
class  working_gpt.Block
 Transformer block: communication followed by computation. More...
 
class  working_gpt.GPTLanguageModel
 

Namespaces

namespace  working_gpt
 

Functions

 working_gpt.get_batch (split)
 
 working_gpt.estimate_loss ()
 

Variables

int working_gpt.batch_size = 64
 
int working_gpt.block_size = 256
 
int working_gpt.max_iters = 5000
 
int working_gpt.eval_interval = 500
 
int working_gpt.learning_rate = 3e-4
 
str working_gpt.device = "mps"
 
int working_gpt.eval_iters = 200
 
int working_gpt.n_embd = 384
 
int working_gpt.n_head = 6
 
int working_gpt.n_layer = 6
 
float working_gpt.dropout = 0.2
 
 working_gpt.encoding
 
 working_gpt.text = f.read()
 
 working_gpt.chars = sorted(list(set(text)))
 
 working_gpt.vocab_size = len(chars)
 
dict working_gpt.stoi = {ch: i for i, ch in enumerate(chars)}
 
dict working_gpt.itos = {i: ch for i, ch in enumerate(chars)}
 
 working_gpt.encode
 
str working_gpt.decode
 
 working_gpt.data = torch.tensor(encode(text), dtype=torch.long)
 
 working_gpt.n = int(0.9 * len(data))
 
 working_gpt.train_data = data[:n]
 
 working_gpt.val_data = data[n:]
 
 working_gpt.model = GPTLanguageModel()
 
 working_gpt.m = model.to(device)
 
 working_gpt.optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
 
 working_gpt.losses = estimate_loss()
 
 working_gpt.xb
 
 working_gpt.yb
 
 working_gpt.logits
 
 working_gpt.loss
 
 working_gpt.set_to_none
 
 working_gpt.context = torch.zeros((1, 1), dtype=torch.long, device=device)