Tensorflow2——图像定位

Albert_xiong

发布于 2021-06-21 17:57:51

7940

发布于 2021-06-21 17:57:51

文章被收录于专栏：Mybatis学习Mybatis学习

图像定位

给定一副图片，我们要输出四个数字（x,y,w,h），图像中某一个点的坐标（x,y），以及图像的宽度和高度，有了这四个数字，我们可以很容易的找到物体的边框。

1、单张图片图像定位

import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from lxml import etree
import glob
from matplotlib.patches import Rectangle

img=tf.io.read_file("./location/images/Abyssinian_1.jpg")
img=tf.image.decode_jpeg(img)
plt.imshow(img)

#读取xml文件

xml=open("./location/annotations/xmls/Abyssinian_1.xml").read()
#解析
sel=etree.HTML(xml) #建立好选择器
width=int(sel.xpath("//size/width/text()")[0])
height=int(sel.xpath("//size/height/text()")[0])
xmin=int(sel.xpath("//bndbox/xmin/text()")[0])
xmax=int(sel.xpath("//bndbox/xmax/text()")[0])
ymin=int(sel.xpath("//bndbox/ymin/text()")[0])
ymax=int(sel.xpath("//bndbox/ymax/text()")[0])
#根目录下的size里的width，取出text文本
#这样解析出来的是一个列表，列表里面放置的有文本
## width,height,xmin,xmax,ymin,ymax
#(600, 400, 333, 425, 72, 158)

plt.imshow(img)
rec=Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill=False,color="red")  #最下角的值就是xmin,ymin
ax=plt.gca()  #获取当前图像
ax.axes.add_patch(rec)

2、随意尺度图片定位

（代码紧接上）

img=tf.image.resize(img,(224,224))
img=img/255
plt.imshow(img)

xmin=(xmin/width)*224
xmax=(xmax/width)*224
ymin=(ymin/height)*224
ymax=(ymax/height)*224

plt.imshow(img)
rec=Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill=False,color="red")  #最下角的值就是xmin,ymin
ax=plt.gca()  #获取当前图像
ax.axes.add_patch(rec)

3、批量图片定位

创建输入管道数据读取与预处理获取图像的路径

images=glob.glob("./location/images/*.jpg")
#获取目标值
xmls=glob.glob("./location/annotations/xmls/*.xml")
#拿到这3686张图片（与xml文件对应的）
name = [x.split("\\")[-1].split(".xml")[0] for x in xmls]

如何取出这些名称中的images呢？

imgs_train=[img for img in images if (img.split("\\")[-1].split(".jpg")[0]) in name]

数据集划分

test_count=int(len(imgs_train)*0.2)
train_count=len(imgs_train)-test_count

def to_labels(path):
    #读取路径
    xml=open("{}".format(path)).read()
    sel=etree.HTML(xml)
    width=int(sel.xpath("//size/width/text()")[0])
    height=int(sel.xpath("//size/height/text()")[0])
    xmin=int(sel.xpath("//bndbox/xmin/text()")[0])
    xmax=int(sel.xpath("//bndbox/xmax/text()")[0])
    ymin=int(sel.xpath("//bndbox/ymin/text()")[0])
    ymax=int(sel.xpath("//bndbox/ymax/text()")[0])
    return  [xmin/width,ymin/height,xmax/width,ymax/height]

labels=[to_labels(path) for path in xmls]  #每个label里面包含x的最小值，x的最大值，y的最小值，y的最大值

out_1,out_2,out_3,out_4=list(zip(*labels))  #把xmin,ymin,xmax,ymax分别弄在一起
out_1=np.array(out_1)
out_2=np.array(out_2)
out_3=np.array(out_3)
out_4=np.array(out_4)

标签数据集

label_datasets=tf.data.Dataset.from_tensor_slices((out_1,out_2,out_3,out_4))

载入图片

def load_image(path):
    image=tf.io.read_file(path)
    image=tf.image.decode_jpeg(image,channels=3)
    image=tf.image.resize(image,(224,224))
    image=image/255
    return image

图片数据集处理

image_dataset=tf.data.Dataset.from_tensor_slices(imgs_train)
image_dataset=image_dataset.map(load_image)

图片数据集与标签数据集整合

dataset=tf.data.Dataset.zip((image_dataset,label_datasets))

划分数据集

dataset_train=dataset.skip(test_count)
dataset_test=dataset.take(test_count)

BATCH_SIZE=8
BUFFER_SIZE=300
STEPS_PER_EPOCH=train_count//BATCH_SIZE
VALIDATION_STEPS=test_count//BATCH_SIZE

训练数据集与测试数据集的处理

dataset_train=dataset_train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
dataset_train=dataset_train.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
dataset_test=dataset_test.batch(BATCH_SIZE)

图像定位

for img,label in dataset_train.take(1):   #这里的take（1）是取出一个batch出来,这里的img是一个batch
    #这里的img和label都是tensor
    plt.imshow(tf.keras.preprocessing.image.array_to_img(img[0]))
    out_1,out_2,out_3,out_4=label
    xmin,ymin,xmax,ymax=out_1[0].numpy()*224,out_2[0].numpy()*224,out_3[0].numpy()*224,out_4[0].numpy()*224
    rec=Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill=False,color="red")  #最下角的值就是xmin,ymin
    ax=plt.gca()  #获取当前图像
    ax.axes.add_patch(rec)

创建模型 #创建图像定位的模型，使用预训练网络

xception=tf.keras.applications.Xception(weights="imagenet",include_top = False,input_shape=(224,224,3))
#函数式API
inputs=tf.keras.layers.Input(shape=(224,224,3))
x=xception(inputs)
x=tf.keras.layers.GlobalAveragePooling2D()(x)
x=tf.keras.layers.Dense(2048,activation="relu")(x)
x=tf.keras.layers.Dense(256,activation="relu")(x)
out_1=tf.keras.layers.Dense(1)(x)  #这里是做回归，不需要激活函数
out_2=tf.keras.layers.Dense(1)(x)
out_3=tf.keras.layers.Dense(1)(x)
out_4=tf.keras.layers.Dense(1)(x)
prediction=[out_1,out_2,out_3,out_4]

model=tf.keras.models.Model(inputs=inputs,outputs=prediction)

编译模型

model.compile(tf.keras.optimizers.Adam(lr=0.0001),loss="mse",metrics=["mae"])

训练模型

Epochs=50
history=model.fit(dataset_train,epochs=Epochs,steps_per_epoch=STEPS_PER_EPOCH,validation_steps=VALIDATION_STEPS,validation_data=dataset_test)

…

可视化

loss=history.history["loss"]
val_loss=history.history["val_loss"]
epochs=range(Epochs)
plt.figure()
plt.plot(epochs,loss,"r",label="Training loss")
plt.plot(epochs,val_loss,"bo",label="Validation loss")
plt.title("Training and validation Loss")
plt.xlabel("Epoch")
plt.ylim([0,1])
plt.legend()
plt.show()

模型保存

model.save("detect_v1.h5")

新模型载入训练好的权重

new_model=tf.keras.models.load_model("detect_v1.h5")

新模型预测

plt.figure(figsize=(8,24))
for img,_ in dataset_test.take(1):
    out_1,out_2,out_3,out_4 = new_model.predict(img)
    for i in range(3):
        plt.subplot(3,1,i+1)  #画三行一列的第一个图像
        plt.imshow(tf.keras.preprocessing.image.array_to_img(img[i]))
        xmin,ymin,xmax,ymax = out_1[i]*224,out_2[i]*224,out_3[i]*224,out_4[i]*224
        rect=Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill=False,color="red")
        ax=plt.gca()
        ax.axes.add_patch(rect)

效果还可以，嘻嘻

一级目录

本文参与?腾讯云自媒体分享计划，分享自作者个人站点/博客。

原始发表：2020-06-02 ，如有侵权请联系 cloudcommunity@tencent.com 删除

xml

本文分享自作者个人站点/博客?前往查看

如有侵权，请联系 cloudcommunity@tencent.com 删除。

本文参与?腾讯云自媒体分享计划? ，欢迎热爱写作的你一起参与！

xml

登录后参与评论

0 条评论

热度

Tensorflow2——图像定位

Tensorflow2——图像定位

图像定位

1、单张图片图像定位

2、随意尺度图片定位

3、批量图片定位

一级目录

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐