��/��/��ţ

��

��㿪ʼʵ��VAE��CVAE

��Դ�� - deephub

��ɢģ�Ϳ��Կ��һ��κ��VAE(��Ա��)��ǰ��forward��Ϊ��򣩵Ĺ��̣�ͨ��ڶ��߶��ݷֲ��Ȼ��Ƿ��Ĺ��̣�ȥѧϰ��λָ��ݽṹ��ƻ��ͻָ��̷ֱ��Ӧ��VAE�еı��ͽ��̡��VAE��һ��Ҫ�ĸ��Ҫ��գ��Ľ��python��ͷ��ʼʵ��VAE��CVAE��Ӷ��ǵ��⡣

ʲô��Ա��?��ǵ��ʲô

�Ա��һ��ɱ��ͽ��ɵ��ϵͳ�ṹ��ڱ��֮��м��ν��ز㣬��и��ָ��ƣ��ʱ��Գ�Ϊƿ��㡢Ǳ�ڿռ䡢��ز㡢��㡣��:

�Ա��Ӧ��ڸ��;�ϡ����ľ��ѹ��:��ź�ͨ��ʱ��ͼ��Ǳ�ڱ�ʾ�ڳߴ��ҪС�öࡣ��磬��ͼ�У��Ȼ��ź��8��ֵ��ʾ��ѹ��ʾֻ��Ҫ3��ֵ��

�Ա��Ҳ��ڸ��Ŀ��:��ȥ�룬��ѧϰ��쳣��⣬�Լ��ڴ��ȶ��ɢģ�͡�

�Ա��ʵ��

��ǽ�ʹ��MNIST��ݼ��Ҫ��MNIST��ص��ļ��У��:

# Download the files

url = "http://yann.lecun.com/exdb/mnist/"

filenames = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz',

? ? ? ? ? ? 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']

data = []

for filename in filenames:

? print("Downloading", filename)

? request.urlretrieve(url + filename, filename)

? with gzip.open(filename, 'rb') as f:

? ? ? if 'labels' in filename:

? ? ? ? ? # Load the labels as a one-dimensional array of integers

? ? ? ? ? data.append(np.frombuffer(f.read(), np.uint8, offset=8))

? ? ? else:

? ? ? ? ? # Load the images as a two-dimensional array of pixels

? ? ? ? ? data.append(np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28))

# Split into training and testing sets

X_train, y_train, X_test, y_test = data

# Normalize the pixel values

X_train = X_train.astype(np.float32) / 255.0

X_test = X_test.astype(np.float32) / 255.0

# Convert labels to integers

y_train = y_train.astype(np.int64)

y_test = y_test.astype(np.int64)

��ѵ��Ͳ��Լ��ǿ��ͼ��:

def show_images(images, labels):

? """

? Display a set of images and their labels using matplotlib.

? The first column of `images` should contain the image indices,

? and the second column should contain the flattened image pixels

? reshaped into 28x28 arrays.

? """

? # Extract the image indices and reshaped pixels

? pixels = images.reshape(-1, 28, 28)

? # Create a figure with subplots for each image

? fig, axs = plt.subplots(

? ? ? ncols=len(images), nrows=1, figsize=(10, 3 * len(images))

? )

? # Loop over the images and display them with their labels

? for i in range(len(images)):

? ? ? # Display the image and its label

? ? ? axs[i].imshow(pixels[i], cmap="gray")

? ? ? axs[i].set_title("Label: {}".format(labels[i]))

? ? ? # Remove the tick marks and axis labels

? ? ? axs[i].set_xticks([])

? ? ? axs[i].set_yticks([])

? ? ? axs[i].set_xlabel("Index: {}".format(i))

? # Adjust the spacing between subplots

? fig.subplots_adjust(hspace=0.5)

? # Show the figure

? plt.show()

��Ϊ��ݱȽϼ򵥣��ֱ��ʹ��Բ㣬��ǽ��м��㣺

import torch.nn as nn

class AutoEncoder(nn.Module):

? def __init__(self):

? ? ? super().__init__()

? ? ? # Set the number of hidden units

? ? ? self.num_hidden = 8

? ? ? # Define the encoder part of the autoencoder

? ? ? self.encoder = nn.Sequential(

? ? ? ? ? nn.Linear(784, 256), # input size: 784, output size: 256

? ? ? ? ? nn.ReLU(), # apply the ReLU activation function

? ? ? ? ? nn.Linear(256, self.num_hidden), # input size: 256, output size: num_hidden

? ? ? ? ? nn.ReLU(), # apply the ReLU activation function

? ? ? )

? ? ? # Define the decoder part of the autoencoder

? ? ? self.decoder = nn.Sequential(

? ? ? ? ? nn.Linear(self.num_hidden, 256), # input size: num_hidden, output size: 256

? ? ? ? ? nn.ReLU(), # apply the ReLU activation function

? ? ? ? ? nn.Linear(256, 784), # input size: 256, output size: 784

? ? ? ? ? nn.Sigmoid(), # apply the sigmoid activation function to compress the output to a range of (0, 1)

? ? ? )

? def forward(self, x):

? ? ? # Pass the input through the encoder

? ? ? encoded = self.encoder(x)

? ? ? # Pass the encoded representation through the decoder

? ? ? decoded = self.decoder(encoded)

? ? ? # Return both the encoded representation and the reconstructed output

? ? ? return encoded, decoded

ѵ��ʱ��ǲ��Ҫͼ��ǩ��Ϊ��һ��޼ල�ķ��ѡ��ʹ�ü򵥵ľ��ʧ��Ϊ��ȷ�ķ�ʽ�ؽ��ǵ�ͼ��һЩ׼��:

# Convert the training data to PyTorch tensors

X_train = torch.from_numpy(X_train)

# Create the autoencoder model and optimizer

model = AutoEncoder()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Define the loss function

criterion = nn.MSELoss()

# Set the device to GPU if available, otherwise use CPU

model.to(device)

# Create a DataLoader to handle batching of the training data

train_loader = torch.utils.data.DataLoader(

? X_train, batch_size=batch_size, shuffle=True

)

��ѵ��ѭ��Ҳ�ܱ�׼��

# Training loop

for epoch in range(num_epochs):

? total_loss = 0.0

? for batch_idx, data in enumerate(train_loader):

? ? ? # Get a batch of training data and move it to the device

? ? ? data = data.to(device)

? ? ? # Forward pass

? ? ? encoded, decoded = model(data)

? ? ? # Compute the loss and perform backpropagation

? ? ? loss = criterion(decoded, data)

? ? ? optimizer.zero_grad()

? ? ? loss.backward()

? ? ? optimizer.step()

? ? ? # Update the running loss

? ? ? total_loss += loss.item() * data.size(0)

? # Print the epoch loss

? epoch_loss = total_loss / len(train_loader.dataset)

? print(

? ? ? "Epoch {}/{}: loss={:.4f}".format(epoch + 1, num_epochs, epoch_loss)

? )

Ϊ�˼��ʧ��ͼ��ؽ��ͼ��ֱ�ӽ��н��бȽϾͿ��ˡ�ѵ��ٶȻ�ܿ죬��CPU��ɡ��ѵ��ɺ󣬱Ƚ��ͼ��:

��һ��ԭʼͼ��һ��ؽ�ͼ��

��м��⣺

1��ؽ��ͼ��ģ��Ϊ�ؽ��

2��͵�ѹ��ѵģ��ڽ��л��⣬3��ֻʹ��8��ص�Ԫ��ص�Ԫ��ͼ��ǻ�ʹģ��ء�

��32��ص�Ԫ�Ľ��

�Ա��ݱ��ֵû��һ��⣬��ݷǳ��ѡ��ȥ��֣�ֻ��Ǳ�ڲ㿪ʼ��Ӧ��ܹ��õ�һ��ͼ�񡣵��Ƕ��Ա��˵��û�з��ķ�ʽ��Ǳ�ڿռ��в��һ�ֿɿ��Ĳ��ԣ��ȷ��ͼ��ǿɶ��ģ��һ��ᷢ��һ��ı仯��

��Ҫ��Ǵ��Ǳ�ڿռ��һ��Ǳ�ڿռ�ֲ��е��ѣ��ǴӾ��ǵõ��Ľ��:

��Ȼ��Щ��ܺã��Ҫ��ͬһ�ռ��Ĳ��ø��ѣ��Ϊ�ÿռ��ά��ߡ��磬��ǽ�ά��ӵ�32��:

��Ѿ��޷��ˣ��û�и��õİ취��?

��Ա�� VAE

��Ա��(VAEs)��Ϊ��Auto-Encoding Variational Bayes��Diederik P. Kingma��Max Welling��2014�귢��

VAEsΪ��ṩ��һ�ָ��ķ��ѧϰ��Ǳ�ڱ�ʾ��˼��ܼ�:��ѧϰ��Ǳ�ڿռ�ֲ��Ĳ��Ǿ��ֵ��Ǳ�ڱ��ʱ��ֱ�Ӵ�Ǳ�ڱ�ʾ�л�ȡ��ʹ��Ǳ�ڿռ�ֲ��Ǳ�ڱ�ʾ��

��ʹ��ѧϰ��ֵ�ͷ��Ժ�ʹ��ǶԽ�Ҫ�ý��Ǳ�ڱ��в��

��ǲ��ǲ��΢�ġ��ʹ��һ�ֽ��²��ļ��ɡ��Ĺ��ԭ��:��ǲ��ٴ��Ǹ��л�ȡ��ȷ��ѧϰ��ֵ�ͷ��Ȼ��һ��Ŀ飬ֻ��в��

��ˣ��ǲ�ֱ�Ӷ��Щ�ֲ��г��²��:

~��ʾǱ�ڱ�ʾ��һ��ɲ��֡��ڵ�ģ�ͱ��

��ģ�ʹ��Ҳ��֮�ı䣺

class VAE(AutoEncoder):

? def __init__(self):

? ? ? super().__init__()

? ? ? # Add mu and log_var layers for reparameterization

? ? ? self.mu = nn.Linear(self.num_hidden, self.num_hidden)

? ? ? self.log_var = nn.Linear(self.num_hidden, self.num_hidden)

? def reparameterize(self, mu, log_var):

? ? ? # Compute the standard deviation from the log variance

? ? ? std = torch.exp(0.5 * log_var)

? ? ? # Generate random noise using the same shape as std

? ? ? eps = torch.randn_like(std)

? ? ? # Return the reparameterized sample

? ? ? return mu + eps * std

? def forward(self, x):

? ? ? # Pass the input through the encoder

? ? ? encoded = self.encoder(x)

? ? ? # Compute the mean and log variance vectors

? ? ? mu = self.mu(encoded)

? ? ? log_var = self.log_var(encoded)

? ? ? # Reparameterize the latent variable

? ? ? z = self.reparameterize(mu, log_var)

? ? ? # Pass the latent variable through the decoder

? ? ? decoded = self.decoder(z)

? ? ? # Return the encoded output, decoded output, mean, and log variance

? ? ? return encoded, decoded, mu, log_var

? def sample(self, num_samples):

? ? ? with torch.no_grad():

? ? ? ? ? # Generate random noise

? ? ? ? ? z = torch.randn(num_samples, self.num_hidden).to(device)

? ? ? ? ? # Pass the noise through the decoder to generate samples

? ? ? ? ? samples = self.decoder(z)

? ? ? # Return the generated samples

? ? ? return samples

��ѵ��ģ��أ��ʧ��:

# Define a loss function that combines binary cross-entropy and Kullback-Leibler divergence

def loss_function(recon_x, x, mu, logvar):

? # Compute the binary cross-entropy loss between the reconstructed output and the input data

? BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction="sum")

? # Compute the Kullback-Leibler divergence between the learned latent variable distribution and a standard Gaussian distribution

? KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

? # Combine the two losses by adding them together and return the result

? return BCE + KLD

��һ��Ѿ��Ϥ�ˣ��ֻ��ع���ڶ��˶�ѧϰ�ֲ�ƫ��ֲ��ĳͷ��ǵ��ε�KLɢ�ȣ��ԱȽ��ʷֲ�֮��ԣ��ǱȽϵ��ǵķֲ��׼��̬�ֲ��ԣ��ǾͿ��ѵ��

def train_vae(X_train, learning_rate=1e-3, num_epochs=10, batch_size=32):

? # Convert the training data to PyTorch tensors

? X_train = torch.from_numpy(X_train).to(device)

? # Create the autoencoder model and optimizer

? model = VAE()

? optimizer = optim.Adam(model.parameters(), lr=learning_rate)

? # Define the loss function

? criterion = nn.MSELoss(reduction="sum")

? # Set the device to GPU if available, otherwise use CPU

? model.to(device)

? # Create a DataLoader to handle batching of the training data

? train_loader = torch.utils.data.DataLoader(

? ? ? X_train, batch_size=batch_size, shuffle=True

? )

? # Training loop

? for epoch in range(num_epochs):

? ? ? total_loss = 0.0

? ? ? for batch_idx, data in enumerate(train_loader):

? ? ? ? ? # Get a batch of training data and move it to the device

? ? ? ? ? data = data.to(device)

? ? ? ? ? # Forward pass

? ? ? ? ? encoded, decoded, mu, log_var = model(data)

? ? ? ? ? # Compute the loss and perform backpropagation

? ? ? ? ? KLD = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())

? ? ? ? ? loss = criterion(decoded, data) + 3 * KLD

? ? ? ? ? optimizer.zero_grad()

? ? ? ? ? loss.backward()

? ? ? ? ? optimizer.step()

? ? ? ? ? # Update the running loss

? ? ? ? ? total_loss += loss.item() * data.size(0)

? ? ? # Print the epoch loss

? ? ? epoch_loss = total_loss / len(train_loader.dataset)

? ? ? print(

? ? ? ? ? "Epoch {}/{}: loss={:.4f}".format(epoch + 1, num_epochs, epoch_loss)

? ? ? )

? # Return the trained model

? return model

��濴��ɵ�ͼ��

ͼ��Ǻ�ģ��Ϊ��ʹ��MAE��ؽ��ƣ�ʹ��ʧ��

��һ��ǳ��Ȥ��һ��Ȼ��𽥸ı��һ��ά�ȣ�ͬʱ��ά�ȹ̶��ǾͿ��Կ��α仯�ġ��һЩ��:

ͨ��ı��һ��Ǵ�0�ƶ��9��Ȼ��ƶ��1��7��

��ɾ��ض��ǩ��ͼ�� CVAE

Ϊ��ɾ��ض��ǩ��ͼ�񣬱��Ҫѧϰ��ڸ��ʾʱ��Ǳ�ڱ��£��һЩ��Ϣ��Ʊ��ô��?һ��Ե��뷨�ǽ�һ��ֱ�ǩ��ݸ��Ϳ��ѧϰ��̡��:

��Ϊ��һ��ϢԴ��Ϊʲô��ͶӰ��?��ͶӰ��ָ��ƥ��ߴ�ͱ�ǩ��Ϣ��Ҫ��ͶӰ��Ǳ�ڿռ��ͬ��ά�ȣ�Ȼ��Ǽ��Ҳ��ȡƽ��ֵ��˷��ֻ�ǰ��Щ��κ��Ƶķ��ԣ��ֻ�Ǽ򵥵��ӡ��ͼ��ǲ��Ǻ��ɢģ��е��ˣ�cvae��2016�귢��ģ��

Ȼ��ʱ��Ҫ��ľ��Ǵ��һ��Ҫ��ɵ��ֵı�ǩ��ͱ�Ϊ��:

class ConditionalVAE(VAE):

? # VAE implementation from the article linked above

? def __init__(self, num_classes):

? ? ? super().__init__()

? ? ? # Add a linear layer for the class label

? ? ? self.label_projector = nn.Sequential(

? ? ? ? ? nn.Linear(num_classes, self.num_hidden),

? ? ? ? ? nn.ReLU(),

? ? ? )

? def condition_on_label(self, z, y):

? ? ? projected_label = self.label_projector(y.float())

? ? ? return z + projected_label

? def forward(self, x, y):

? ? ? # Pass the input through the encoder

? ? ? encoded = self.encoder(x)

? ? ? # Compute the mean and log variance vectors

? ? ? mu = self.mu(encoded)

? ? ? log_var = self.log_var(encoded)

? ? ? # Reparameterize the latent variable

? ? ? z = self.reparameterize(mu, log_var)

? ? ? # Pass the latent variable through the decoder

? ? ? decoded = self.decoder(self.condition_on_label(z, y))

? ? ? # Return the encoded output, decoded output, mean, and log variance

? ? ? return encoded, decoded, mu, log_var

? def sample(self, num_samples, y):

? ? ? with torch.no_grad():

? ? ? ? ? # Generate random noise

? ? ? ? ? z = torch.randn(num_samples, self.num_hidden).to(device)

? ? ? ? ? # Pass the noise through the decoder to generate samples

? ? ? ? ? samples = self.decoder(self.condition_on_label(z, y))

? ? ? # Return the generated samples

? ? ? return samples

��һ��label_projector��²㣬��ͶӰ��Ǳ�ڿռ��ǰ�򴫵ݺͲ��ж�ͨ��ò㡣

CVAE��ʧ��VAE��ʧ��ѵ��Ҳ��һ��ֻ��ˣ�

num_samples = 10

random_labels = [8] * num_samples

show_images(

? cvae.sample(num_samples, one_hot(torch.LongTensor(random_labels), num_classes=10).to(device))

? .cpu()

? .detach()

? .numpy(),

? labels=random_labels,

)

��Կ��ǵ�ͼ��̶��ˣ��

�ܽ�

�Ա��޼ලѧϰ��ѹ��Ļ��Ȼ�򵥵��Զ��ؽ�ͼ�񣬵��ݡ��Ա��(VAEs)�ṩ��һ�ָ��ķ��ͨ��ѧϰ�ɲ��Ǳ�ڿռ�ֲ��Ĳ��ݡ��ز��ʹ��΢��CVAE��Ϊ��ṩ��֧�֣��ѧϰ��Щ��ȶ��ɢģ��ṩ�ܺõ��ۻ��

��Konstantin Sofeikov

��: 2023-04-282023-04-28 09:39:09
ԭ��https://kuaibao.qq.com/s/20230428A01R9A00?refer=cp_1026
��Ѷ��Ѷ�ƿ��Ѷ��ݿ��ƽ̨�ʺţ��ţ��֮һ��Ѷ��ݿ��ƽ̨��Э�顷ת�ط��ݡ�
��Ȩ��ϵ cloudcommunity@tencent.com ɾ��

��Ѷ

ɨ��

��վ�� Ⱥ

��ȡר�� 10Ԫ��ż�ȯ

˽�� ��ɻ�

��㿪ʼʵ��VAE��CVAE

��Ѷ

ɨ��

��

�

��Դ

��

��Ѷ�ƿ��

��Ų�Ʒ

��Ƽ�

��Ƽ�

���㿪ʼʵ��VAE��CVAE

�����Ѷ

����

�

��Դ

����

��Ѷ�ƿ�����

���Ų�Ʒ

�����Ƽ�

�����Ƽ�

��㿪ʼʵ��VAE��CVAE

��Ѷ

��

��

��Ѷ�ƿ��

��Ų�Ʒ

��Ƽ�

��Ƽ�