Source code for hello.data.coco2yolo
# https://github.com/ultralytics/JSON2YOLO
import shutil
from pathlib import Path
import numpy as np
from hello import io
from tqdm import tqdm
img_formats = set([".bmp", ".jpg", ".jpeg", ".png"])
[docs]
def coco_to_yolo(coco_dir, json_dir=None, classes=None):
coco_dir = Path(coco_dir)
if json_dir is None:
json_dir = coco_dir
else:
json_dir = coco_dir / json_dir
out_dir = coco_dir.parent / (coco_dir.name + "_yolo")
shutil.rmtree(out_dir, ignore_errors=True)
(out_dir / "labels").mkdir(parents=True)
(out_dir / "images").mkdir(parents=True)
real_path = {f.name: str(f) for f in coco_dir.glob("**/*")
if f.suffix in img_formats}
total_images = len(real_path)
for json_file in sorted(json_dir.glob("*.json")):
data = io.load_json(json_file)
images = {x["id"]: x for x in data["images"]}
names = classes or [x["name"] for x in data["categories"]]
names = ["REMAINDER"] + sorted(set(names).difference(["REMAINDER"]))
name_dict = {s: i for i, s in enumerate(names, 0)}
cvt_id = {x["id"]: name_dict.get(x["name"], 0)
for x in data["categories"]}
image_path_list = []
for x in tqdm(data["images"], desc=f"{json_file.stem}"):
img_name = x["file_name"]
src_path = real_path[img_name]
dst_path = out_dir / "images" / img_name
if not dst_path.exists():
shutil.copyfile(src_path, dst_path)
image_path_list.append(f"./images/{img_name}")
image_path_list = sorted(set(image_path_list))
n_images = len(image_path_list)
with open(out_dir / "names.txt", "a") as file:
file.write("{}: {}\n".format(json_file.stem, names))
with open(out_dir / (json_file.stem + ".txt"), "w") as file:
file.write("\n".join(image_path_list))
for x in tqdm(data["annotations"], desc=f"{json_file.stem} ({n_images}/{total_images})"):
if x.get("iscrowd"):
continue
img = images[x["image_id"]]
h, w, f = img["height"], img["width"], img["file_name"]
# format is [top left x, top left y, width, height]
box = np.array(x["bbox"], dtype=np.float32)
box[:2] += box[2:] / 2 # to center
box[[0, 2]] /= w # normalize x
box[[1, 3]] /= h # normalize y
if (box[2] > 0.) and (box[3] > 0.): # if w * h > 0
with open(out_dir / "labels" / (Path(f).stem + ".txt"), "a") as file:
file.write("%g %.6f %.6f %.6f %.6f\n" %
(cvt_id[x["category_id"]], *box))
return str(out_dir)
[docs]
def parse_args(args=None):
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument("coco_dir", type=str,
help="dataset root dir")
parser.add_argument("-j", "--json_dir", type=str,
help="coco json file dir")
parser.add_argument("--classes", nargs="+", type=str,
help="filter by class: --classes c0 c2 c3")
args = parser.parse_args(args=args)
return vars(args)
[docs]
def main(args=None):
kwargs = parse_args(args)
print(f"{__file__}: {kwargs}")
print(coco_to_yolo(**kwargs))
return 0