Transformer fundamentals
 
Loading...
Searching...
No Matches
gpt.py
Go to the documentation of this file.
1import os
2
3import torch
4from torch.nn import functional as F
5
6# hyperparameters
7batch_size = 64 # how many independent sequences will we process in parallel
8block_size = 256 # maximum context length of predicting
9max_iters = 5000
10eval_interval = 500
11learning_rate = 3e-4
12if torch.backends.mps.is_available() and torch.backends.mps.is_built():
13 device = "mps"
14# check for cuda which should be used because obviously
15elif torch.cuda.is_available():
16 device = "cuda"
17# if now GPU (AMD excluded right now) then just use the CPU
18else:
19 device = "cpu"
20eval_iters = 200
21n_embd = 384
22n_head = 6
23n_layer = 6
24dropout = 0.2
25# -----------------------------------------------------------------------------
26
27torch.manual_seed(1337)
28
29input_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "input.txt"))
30with open(input_path, "r", encoding="utf-8") as f:
31 text = f.read()
32
33# all the unique characters that occur in the text
34chars = sorted(list(set(text)))
35vocab_size = len(chars)
36
37# create a mapping for characters to integers
38stoi = {ch: i for i, ch in enumerate(chars)}
39itos = {i: ch for i, ch in enumerate(chars)}
40encode = lambda s: [stoi[c] for c in s]
41decode = lambda l: "".join([itos[i] for i in l])
42
43# Train and test splits