Transformer fundamentals: gpt/working_gpt.py File Reference

Transformer fundamentals

Loading...

Searching...

No Matches

Go to the source code of this file.

Classes
class	working_gpt.Head
	one head of self-attention More...

class	working_gpt.MultiHeadAttention
	multiple heads of self-attention in parallel More...

class	working_gpt.FeedFoward
	a simple linear layer followed by a non-linearity More...

class	working_gpt.Block
	Transformer block: communication followed by computation. More...

class	working_gpt.GPTLanguageModel

Namespaces
namespace	working_gpt

Functions
	working_gpt.get_batch (split)

	working_gpt.estimate_loss ()

Variables
int	working_gpt.batch_size = 64

int	working_gpt.block_size = 256

int	working_gpt.max_iters = 5000

int	working_gpt.eval_interval = 500

int	working_gpt.learning_rate = 3e-4

str	working_gpt.device = "mps"

int	working_gpt.eval_iters = 200

int	working_gpt.n_embd = 384

int	working_gpt.n_head = 6

int	working_gpt.n_layer = 6

float	working_gpt.dropout = 0.2

	working_gpt.encoding

	working_gpt.text = f.read()

	working_gpt.chars = sorted(list(set(text)))

	working_gpt.vocab_size = len(chars)

dict	working_gpt.stoi = {ch: i for i, ch in enumerate(chars)}

dict	working_gpt.itos = {i: ch for i, ch in enumerate(chars)}

	working_gpt.encode

str	working_gpt.decode

	working_gpt.data = torch.tensor(encode(text), dtype=torch.long)

	working_gpt.n = int(0.9 * len(data))

	working_gpt.train_data = data[:n]

	working_gpt.val_data = data[n:]

	working_gpt.model = GPTLanguageModel()

	working_gpt.m = model.to(device)

	working_gpt.optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

	working_gpt.losses = estimate_loss()

	working_gpt.xb

	working_gpt.yb

	working_gpt.logits

	working_gpt.loss

	working_gpt.set_to_none

	working_gpt.context = torch.zeros((1, 1), dtype=torch.long, device=device)

gpt
working_gpt.py
Generated by 1.13.2