tastynoob
Articles58
Tags18
Categories7
神经网络风格迁移

神经网络风格迁移

高中时玩人工智能发现的很有意思的东西

代码基于tensorflow官方文档,上代码!
需要使用VGG19
tensorflow2.0以上

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188

from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib as mpl
from tqdm import trange



steps = 100
style_weight = 1e-2
content_weight = 1e4
total_variation_weight = 1e8

mpl.rcParams['figure.figsize'] = (13, 10)
mpl.rcParams['axes.grid'] = False

# 获取下载后本地图片的路径,content_path是真实照片,style_path是风格图片
content_path = 'content.jpg'
style_path = 'style.jpg'

# 读取一张图片,并做预处理
def load_img(path_to_img):
max_dim = 600
# 读取二进制文件
img = tf.io.read_file(path_to_img)
# 做JPEG解码,得到宽x高x色深矩阵,范围0-255
img = tf.image.decode_jpeg(img)
# 类型从int转换到32位浮点,数值范围0-1
img = tf.image.convert_image_dtype(img, tf.float32)
# 减掉最后色深一维,获取到的相当于图片尺寸(整数),转为浮点
shape = tf.cast(tf.shape(img)[:-1], tf.float32)
# 获取图片长端
long = max(shape)
# 以长端为比例缩放
scale = max_dim/long
new_shape = tf.cast(shape*scale, tf.int32)
# 实际缩放图片
img = tf.image.resize(img, new_shape)
# 再扩展一维,成为图片数字中的一张图片(1,长,宽,色深)
img = img[tf.newaxis, :]
return img

# 读入两张图片
content_image = load_img(content_path)
style_image = load_img(style_path)

############################################################
# 定义最能代表内容特征的网络层
content_layers = ['block5_conv2']

# 定义最能代表风格特征的网络层
style_layers = ['block1_conv1',
'block2_conv1',
'block3_conv1',
'block4_conv1',
'block5_conv1']
# 神经网络层的数量
num_content_layers = len(content_layers)
num_style_layers = len(style_layers)

# 定义一个工具函数,帮助建立得到特定中间层输出结果的新模型
def vgg_layers(layer_names):
""" Creates a vgg model that returns a list of intermediate output values."""
# 定义使用ImageNet数据训练的vgg19网络
vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
# 已经经过了训练,所以锁定各项参数避免再次训练
vgg.trainable = False
# 获取所需层的输出结果
outputs = [vgg.get_layer(name).output for name in layer_names]
# 最终返回结果是一个模型,输入是图片,输出为所需的中间层输出
model = tf.keras.Model([vgg.input], outputs)
return model

def gram_matrix(input_tensor):
result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
input_shape = tf.shape(input_tensor)
num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32)
return result/(num_locations)

# 自定义keras模型
class StyleContentModel(tf.keras.models.Model):
def __init__(self, style_layers, content_layers):
super(StyleContentModel, self).__init__()
# 自己的vgg模型,包含上面所列的风格抽取层和内容抽取层
self.vgg = vgg_layers(style_layers + content_layers)
self.style_layers = style_layers
self.content_layers = content_layers
self.num_style_layers = len(style_layers)
# vgg各层参数锁定不再参数训练
self.vgg.trainable = False

def call(self, input):
# 输入的图片是0-1范围浮点,转换到0-255以符合vgg要求
input = input*255.0
# 对输入图片数据做预处理
preprocessed_input = tf.keras.applications.vgg19.preprocess_input(input)
# 获取风格层和内容层输出
outputs = self.vgg(preprocessed_input)
# 输出实际是一个数组,拆分为风格输出和内容输出
style_outputs, content_outputs = (
outputs[:self.num_style_layers],
outputs[self.num_style_layers:])
# 计算风格矩阵
style_outputs = [gram_matrix(style_output)
for style_output in style_outputs]

# 转换为字典
content_dict = {content_name: value
for content_name, value
in zip(self.content_layers, content_outputs)}
# 转换为字典
style_dict = {style_name: value
for style_name, value
in zip(self.style_layers, style_outputs)}
# 返回内容和风格结果
return {'content': content_dict, 'style': style_dict}

# 使用自定义模型建立一个抽取器
extractor = StyleContentModel(style_layers, content_layers)

# 设定风格特征的目标,即最终生成的图片,希望风格上尽量接近风格图片
style_targets = extractor(style_image)['style']
# 设定内容特征的目标,即最终生成的图片,希望内容上尽量接近内容图片
content_targets = extractor(content_image)['content']

# 内容图片转换为张量
image = tf.Variable(content_image)

# 截取0-1的浮点数,超范围部分被截取
def clip_0_1(image):
return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)

# 优化器
opt = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)
# 预定义风格和内容在最终结果中的权重值,用于在损失函数中计算总损失值


# 损失函数
def style_content_loss(outputs):
style_outputs = outputs['style']
content_outputs = outputs['content']
# 风格损失值,就是计算方差
style_loss = tf.add_n([tf.reduce_mean((style_outputs[name]-style_targets[name])**2)
for name in style_outputs.keys()])
# 权重值平均到每层,计算总体风格损失值
style_loss *= style_weight/num_style_layers

# 内容损失值,也是计算方差
content_loss = tf.add_n([tf.reduce_mean((content_outputs[name]-content_targets[name])**2)
for name in content_outputs.keys()])
content_loss *= content_weight/num_content_layers
# 总损失值
loss = style_loss+content_loss
return loss



###################################################

def high_pass_x_y(image):
x_var = image[:, :, 1:, :] - image[:, :, :-1, :]
y_var = image[:, 1:, :, :] - image[:, :-1, :, :]

return x_var, y_var

# 计算总体变分损失
def total_variation_loss(image):
x_deltas, y_deltas = high_pass_x_y(image)
return tf.reduce_mean(x_deltas**2)+tf.reduce_mean(y_deltas**2)

@tf.function()
def train_step(image):
with tf.GradientTape() as tape:
outputs = extractor(image)
loss = style_content_loss(outputs)
loss += total_variation_weight*total_variation_loss(image)
grad = tape.gradient(loss, image)
opt.apply_gradients([(grad, image)])
image.assign(clip_0_1(image))


image = tf.Variable(content_image)

for n in trange(steps):
train_step(image)

plt.imshow(image.read_value()[0])
Author:tastynoob
Link:https://tastynoob.github.io/1970/01/01/%E5%9B%BE%E5%83%8F%E9%A3%8E%E6%A0%BC%E8%BF%81%E7%A7%BB/
版权声明:本文采用 CC BY-NC-SA 3.0 CN 协议进行许可
×