|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +''' |
| 3 | +@time: 2019/01/11 11:28 |
| 4 | +spytensor |
| 5 | +''' |
| 6 | + |
| 7 | +import os |
| 8 | +import json |
| 9 | +import numpy as np |
| 10 | +import pandas as pd |
| 11 | +import glob |
| 12 | +import cv2 |
| 13 | +import os |
| 14 | +import shutil |
| 15 | +from IPython import embed |
| 16 | +from sklearn.model_selection import train_test_split |
| 17 | +np.random.seed(41) |
| 18 | + |
| 19 | +#0为背景 |
| 20 | +classname_to_id = {"person": 1} |
| 21 | + |
| 22 | +class Csv2CoCo: |
| 23 | + |
| 24 | + def __init__(self,image_dir,total_annos): |
| 25 | + self.images = [] |
| 26 | + self.annotations = [] |
| 27 | + self.categories = [] |
| 28 | + self.img_id = 0 |
| 29 | + self.ann_id = 0 |
| 30 | + self.image_dir = image_dir |
| 31 | + self.total_annos = total_annos |
| 32 | + |
| 33 | + def save_coco_json(self, instance, save_path): |
| 34 | + json.dump(instance, open(save_path, 'w'), ensure_ascii=False, indent=2) # indent=2 更加美观显示 |
| 35 | + |
| 36 | + # 由txt文件构建COCO |
| 37 | + def to_coco(self, keys): |
| 38 | + self._init_categories() |
| 39 | + for key in keys: |
| 40 | + self.images.append(self._image(key)) |
| 41 | + shapes = self.total_annos[key] |
| 42 | + for shape in shapes: |
| 43 | + bboxi = [] |
| 44 | + for cor in shape[:-1]: |
| 45 | + bboxi.append(int(cor)) |
| 46 | + label = shape[-1] |
| 47 | + annotation = self._annotation(bboxi,label) |
| 48 | + self.annotations.append(annotation) |
| 49 | + self.ann_id += 1 |
| 50 | + self.img_id += 1 |
| 51 | + instance = {} |
| 52 | + instance['info'] = 'spytensor created' |
| 53 | + instance['license'] = ['license'] |
| 54 | + instance['images'] = self.images |
| 55 | + instance['annotations'] = self.annotations |
| 56 | + instance['categories'] = self.categories |
| 57 | + return instance |
| 58 | + |
| 59 | + # 构建类别 |
| 60 | + def _init_categories(self): |
| 61 | + for k, v in classname_to_id.items(): |
| 62 | + category = {} |
| 63 | + category['id'] = v |
| 64 | + category['name'] = k |
| 65 | + self.categories.append(category) |
| 66 | + |
| 67 | + # 构建COCO的image字段 |
| 68 | + def _image(self, path): |
| 69 | + image = {} |
| 70 | + print(path) |
| 71 | + img = cv2.imread(self.image_dir + path) |
| 72 | + image['height'] = img.shape[0] |
| 73 | + image['width'] = img.shape[1] |
| 74 | + image['id'] = self.img_id |
| 75 | + image['file_name'] = path |
| 76 | + return image |
| 77 | + |
| 78 | + # 构建COCO的annotation字段 |
| 79 | + def _annotation(self, shape,label): |
| 80 | + # label = shape[-1] |
| 81 | + points = shape[:4] |
| 82 | + annotation = {} |
| 83 | + annotation['id'] = self.ann_id |
| 84 | + annotation['image_id'] = self.img_id |
| 85 | + annotation['category_id'] = int(classname_to_id[label]) |
| 86 | + annotation['segmentation'] = self._get_seg(points) |
| 87 | + annotation['bbox'] = self._get_box(points) |
| 88 | + annotation['iscrowd'] = 0 |
| 89 | + annotation['area'] = 1.0 |
| 90 | + return annotation |
| 91 | + |
| 92 | + # COCO的格式: [x1,y1,w,h] 对应COCO的bbox格式 |
| 93 | + def _get_box(self, points): |
| 94 | + min_x = points[0] |
| 95 | + min_y = points[1] |
| 96 | + max_x = points[2] |
| 97 | + max_y = points[3] |
| 98 | + return [min_x, min_y, max_x - min_x, max_y - min_y] |
| 99 | + # segmentation |
| 100 | + def _get_seg(self, points): |
| 101 | + min_x = points[0] |
| 102 | + min_y = points[1] |
| 103 | + max_x = points[2] |
| 104 | + max_y = points[3] |
| 105 | + h = max_y - min_y |
| 106 | + w = max_x - min_x |
| 107 | + a = [] |
| 108 | + a.append([min_x,min_y, min_x,min_y+0.5*h, min_x,max_y, min_x+0.5*w,max_y, max_x,max_y, max_x,max_y-0.5*h, max_x,min_y, max_x-0.5*w,min_y]) |
| 109 | + return a |
| 110 | + |
| 111 | + |
| 112 | +if __name__ == '__main__': |
| 113 | + csv_file = "train.csv" |
| 114 | + image_dir = "images/" |
| 115 | + saved_coco_path = "./" |
| 116 | + # 整合csv格式标注文件 |
| 117 | + total_csv_annotations = {} |
| 118 | + annotations = pd.read_csv(csv_file,header=None).values |
| 119 | + for annotation in annotations: |
| 120 | + key = annotation[0].split(os.sep)[-1] |
| 121 | + value = np.array([annotation[1:]]) |
| 122 | + if key in total_csv_annotations.keys(): |
| 123 | + total_csv_annotations[key] = np.concatenate((total_csv_annotations[key],value),axis=0) |
| 124 | + else: |
| 125 | + total_csv_annotations[key] = value |
| 126 | + # 按照键值划分数据 |
| 127 | + total_keys = list(total_csv_annotations.keys()) |
| 128 | + train_keys, val_keys = train_test_split(total_keys, test_size=0.2) |
| 129 | + print("train_n:", len(train_keys), 'val_n:', len(val_keys)) |
| 130 | + # 创建必须的文件夹 |
| 131 | + if not os.path.exists('%scoco/annotations/'%saved_coco_path): |
| 132 | + os.makedirs('%scoco/annotations/'%saved_coco_path) |
| 133 | + if not os.path.exists('%scoco/images/train2017/'%saved_coco_path): |
| 134 | + os.makedirs('%scoco/images/train2017/'%saved_coco_path) |
| 135 | + if not os.path.exists('%scoco/images/val2017/'%saved_coco_path): |
| 136 | + os.makedirs('%scoco/images/val2017/'%saved_coco_path) |
| 137 | + # 把训练集转化为COCO的json格式 |
| 138 | + l2c_train = Csv2CoCo(image_dir=image_dir,total_annos=total_csv_annotations) |
| 139 | + train_instance = l2c_train.to_coco(train_keys) |
| 140 | + l2c_train.save_coco_json(train_instance, '%scoco/annotations/instances_train2017.json'%saved_coco_path) |
| 141 | + for file in train_keys: |
| 142 | + shutil.copy(image_dir+file,"%scoco/images/train2017/"%saved_coco_path) |
| 143 | + for file in val_keys: |
| 144 | + shutil.copy(image_dir+file,"%scoco/images/val2017/"%saved_coco_path) |
| 145 | + # 把验证集转化为COCO的json格式 |
| 146 | + l2c_val = Csv2CoCo(image_dir=image_dir,total_annos=total_csv_annotations) |
| 147 | + val_instance = l2c_val.to_coco(val_keys) |
| 148 | + l2c_val.save_coco_json(val_instance, '%scoco/annotations/instances_val2017.json'%saved_coco_path) |
| 149 | + |
0 commit comments