天天看點

HRSID艦船檢測資料集标簽格式轉換,json轉為xml

HRSID資料集介紹參考原文:https://ieeexplore.ieee.org/document/9127939

資料集下載下傳連結:https://github.com/chaozhong2010/hrsid

雷達目标檢測學習交流群私信加v

原資料集為COCO格式,标簽資訊全部存在train_test2017.json中,本代碼将其轉為VOC格式的标簽,每個jpg對應一個xml,轉換代碼如下,僅供參考:
import xml.dom
import xml.dom.minidom
import os
# from PIL import Image
import cv2
import json

_IMAGE_PATH = r'F:\SAR_Ship\HRSID\JPEGImages'

_INDENT = ' ' * 8
_NEW_LINE = '\n'
_FOLDER_NODE = 'HRSID'
_ROOT_NODE = 'annotation'
_DATABASE_NAME = 'Unknown'
_ANNOTATION = 'VOC'
_AUTHOR = 'zc'
_SEGMENTED = '0'
_DIFFICULT = '0'
_TRUNCATED = '0'
_POSE = 'Unspecified'

# _IMAGE_COPY_PATH= 'JPEGImages'



# 封裝建立節點的過程
def createElementNode(doc, tag, attr):  # 建立一個元素節點
    element_node = doc.createElement(tag)

    # 建立一個文本節點
    text_node = doc.createTextNode(attr)

    # 将文本節點作為元素節點的子節點
    element_node.appendChild(text_node)

    return element_node


def createChildNode(doc, tag, attr, parent_node):
    child_node = createElementNode(doc, tag, attr)

    parent_node.appendChild(child_node)


# object節點比較特殊
def createObjectNode(doc, attrs):
    object_node = doc.createElement('object')
    print("建立object中")
    midname = "ship"

    createChildNode(doc, 'name', midname,
                    object_node)
    createChildNode(doc, 'pose',
                    _POSE, object_node)
    createChildNode(doc, 'truncated',
                    _TRUNCATED, object_node)
    createChildNode(doc, 'difficult',
                    _DIFFICULT, object_node)
    bndbox_node = doc.createElement('bndbox')
    # print("midname1[points]:",midname1["points"])
    createChildNode(doc, 'xmin', str(int(attrs[0])),
                    bndbox_node)
    createChildNode(doc, 'ymin', str(int(attrs[1])),
                    bndbox_node)
    createChildNode(doc, 'xmax', str(int(attrs[2]) + int(attrs[0])),
                    bndbox_node)
    createChildNode(doc, 'ymax', str(int(attrs[3]) + int(attrs[1])),
                    bndbox_node)
    object_node.appendChild(bndbox_node)

    return object_node


# 将documentElement寫入XML檔案
def writeXMLFile(doc, filename):
    tmpfile = open('tmp.xml', 'w')
    doc.writexml(tmpfile, addindent=' ' * 8, newl='\n', encoding='utf-8')
    tmpfile.close()
    # # 删除第一行預設添加的标記
    fin = open('tmp.xml')
    fout = open(filename, 'w')
    lines = fin.readlines()
    for line in lines[1:]:
        if line.split():
            fout.writelines(line)
    fin.close()
    fout.close()


if __name__ == "__main__":
    ##json檔案路徑和圖檔路徑,
    json_path = r"F:\SAR_Ship\HRSID\json/train_test2017.json"
    img_path = r"F:\SAR_Ship\HRSID\JPEGImages"
    Annotations_save_path = r'F:\SAR_Ship\HRSID\Annotations'

    fileList = os.listdir(img_path)
    # print(".....::")
    # print("fileList:", fileList)
    if fileList == 0:
        os._exit(-1)
        # 對于每一張圖都生成對應的json檔案
    for imageName in fileList:
        saveName = imageName.strip(".jpg")
        print(imageName)
        # 得到xml檔案的名字
        xml_file_name = os.path.join(Annotations_save_path, (saveName + '.xml'))
        img = cv2.imread(os.path.join(img_path, imageName))
        height, width, channel = img.shape
        my_dom = xml.dom.getDOMImplementation()
        doc = my_dom.createDocument(None, _ROOT_NODE, None)
        # 獲得根節點
        root_node = doc.documentElement
        # folder節點
        createChildNode(doc, 'folder', _FOLDER_NODE, root_node)
        # filename節點
        createChildNode(doc, 'filename', saveName + '.jpg', root_node)

        # source節點
        source_node = doc.createElement('source')
        # source的子節點
        createChildNode(doc, 'database', _DATABASE_NAME, source_node)
        # createChildNode(doc, 'annotation', _ANNOTATION, source_node)
        # createChildNode(doc, 'image', 'flickr', source_node)
        root_node.appendChild(source_node)
        size_node = doc.createElement('size')
        createChildNode(doc, 'width', str(width), size_node)
        createChildNode(doc, 'height', str(height), size_node)
        createChildNode(doc, 'depth', str(channel), size_node)
        root_node.appendChild(size_node)
        # 建立segmented節點
        createChildNode(doc, 'segmented', _SEGMENTED, root_node)
        # print("建立object節點")
        ann_data = []
        # print(json_path1)
        with open(json_path, "r") as f:
            ann = json.load(f)
            # print(ann)
        for i in range(5603):        # 0 ~ 5603  尋找與jpg_image同名的清單
            # 從第一個filename 第一個id開始循環
            # i就是圖檔image_id
            filename = ann["images"][i]   # 字典
            file_name = filename["file_name"]
            if imageName == file_name:
                id = filename["id"]
                annotations = ann["annotations"]     # len(annotations) = 16951
                for j in range(len(annotations)):    # 0 - 16951
                    image_id = annotations[j]["image_id"]
                    if image_id == id:                      # 說明annotations中的該行資訊屬于該圖檔
                        annotations2 = annotations[j]              # 将image_id等于id的取出來
                        object_node = createObjectNode(doc, annotations2["bbox"])
                        root_node.appendChild(object_node)
                    else:
                        continue
                continue
        # 建構XML檔案名稱
        # 寫入檔案
        writeXMLFile(doc, xml_file_name)