Dataclasses defining the COCO dataset and how to convert it to/from a dict.
%load_ext autoreload
%autoreload 2
Executing <Handle IOLoop.add_future.<locals>.<lambda>(<Future finis...queues.py:248>) at /Users/ysem/miniconda3/envs/cocorepr36/lib/python3.6/site-packages/tornado/ioloop.py:688 created at /Users/ysem/miniconda3/envs/cocorepr36/lib/python3.6/site-packages/tornado/concurrent.py:184> took 0.448 seconds
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

class CocoElement[source]

CocoElement()

CocoElement()

class CocoInfo[source]

CocoInfo(year:Optional[int]=None, version:Optional[str]=None, description:Optional[str]=None, contributor:Optional[str]=None, url:Optional[str]=None, date_created:Optional[str]=None) :: CocoElement

CocoInfo(year:Union[int, NoneType]=None, version:Union[str, NoneType]=None, description:Union[str, NoneType]=None, contributor:Union[str, NoneType]=None, url:Union[str, NoneType]=None, date_created:Union[str, NoneType]=None)

coco_info_dict = {
 'year': 2020,
 'version': 'v1',
 'description': 'desc',
 'contributor': 'me',
 'url': 'http://url',
}

coco_info = CocoInfo.from_dict(coco_info_dict)
coco_info_dict2 = coco_info.to_dict_skip_nulls()

display(coco_info)
display(coco_info_dict2)
assert coco_info_dict2 == coco_info_dict, coco_info_dict2

assert coco_info.collection_name == 'info'
assert coco_info.is_valid()
CocoInfo(year=2020, version='v1', description='desc', contributor='me', url='http://url', date_created=None)
{'year': 2020,
 'version': 'v1',
 'description': 'desc',
 'contributor': 'me',
 'url': 'http://url'}

class CocoLicense[source]

CocoLicense(id:str, name:str, url:Optional[str]=None) :: CocoElement

CocoLicense(id:str, name:str, url:Union[str, NoneType]=None)

coco_license_dict = {
 'id': '1',
 'name': 'Attribution-NonCommercial-NoDerivatives 4.0 International',
 'url': 'https://creativecommons.org/licenses/by-nc-nd/4.0/',
}

coco_license = CocoLicense.from_dict(coco_license_dict)
coco_license_dict2 = coco_license.to_dict_skip_nulls()
display(coco_license)
display(coco_license_dict2)

assert coco_license.is_valid()

assert coco_license_dict2 == coco_license_dict, coco_license_dict2
assert coco_license.collection_name == 'licenses'

# --
# test minimal required fields
assert CocoLicense.from_dict({'id': 2, 'name': 'Apache 2.0'}) == CocoLicense(id=2, name='Apache 2.0')
CocoLicense(id='1', name='Attribution-NonCommercial-NoDerivatives 4.0 International', url='https://creativecommons.org/licenses/by-nc-nd/4.0/')
{'id': '1',
 'name': 'Attribution-NonCommercial-NoDerivatives 4.0 International',
 'url': 'https://creativecommons.org/licenses/by-nc-nd/4.0/'}

class CocoImage[source]

CocoImage(id:str, coco_url:str, width:Optional[int]=None, height:Optional[int]=None, license:Optional[int]=None, file_name:Optional[str]=None, flickr_url:Optional[str]=None, date_captured:Optional[str]=None) :: CocoElement

CocoImage(id:str, coco_url:str, width:Union[int, NoneType]=None, height:Union[int, NoneType]=None, license:Union[int, NoneType]=None, file_name:Union[str, NoneType]=None, flickr_url:Union[str, NoneType]=None, date_captured:Union[str, NoneType]=None)

coco_image_dict = {
 'id': '204800',
 'license': 1,
 'coco_url': 'https://outforz.s3.amazonaws.com/media/public/content/2021/01/10/e2e76667-f7e.jpg',
 'width': 1920,
 'height': 2560,
 'file_name': 'e2e76667-f7e.jpg',
 'date_captured': '2021-01-05 13:18:13',
}


coco_image = CocoImage.from_dict(coco_image_dict)
display(coco_image)

assert coco_image.is_valid()

assert coco_image.to_dict_skip_nulls() == coco_image_dict, coco_image.to_dict_skip_nulls()
assert coco_image.collection_name == 'images'

# --
# test minimal required fields
assert CocoImage.from_dict({'id': 2, 'coco_url': 'http://image'}) == CocoImage(id=2, coco_url='http://image')

assert CocoImage(id=1, coco_url='http://abc.com/keks.jpg').get_file_name() == 'keks.jpg'
CocoImage(id='204800', coco_url='https://outforz.s3.amazonaws.com/media/public/content/2021/01/10/e2e76667-f7e.jpg', width=1920, height=2560, license=1, file_name='e2e76667-f7e.jpg', flickr_url=None, date_captured='2021-01-05 13:18:13')

class CocoAnnotation[source]

CocoAnnotation(id:str, image_id:str) :: CocoElement

CocoAnnotation(id:str, image_id:str)

class CocoObjectDetectionAnnotation[source]

CocoObjectDetectionAnnotation(id:str, image_id:str, category_id:str, bbox:Optional[Tuple[int, Ellipsis]], supercategory:Optional[str]=None, area:Optional[int]=None, iscrowd:Optional[int]=None) :: CocoAnnotation

CocoObjectDetectionAnnotation(id:str, image_id:str, category_id:str, bbox:Union[Tuple[int, ...], NoneType], supercategory:Union[str, NoneType]=None, area:Union[int, NoneType]=None, iscrowd:Union[int, NoneType]=None)

c = CocoObjectDetectionAnnotation(id=1, image_id=2, category_id=3, bbox=[])
assert not c.is_valid()

c = replace(c, bbox=[1, -2, 3, 4])
assert not c.is_valid()

c = replace(c, bbox=[1, 2, 3, 4])
assert c.is_valid()

class CocoCategory[source]

CocoCategory(id:str) :: CocoElement

CocoCategory(id:str)

class CocoObjectDetectionCategory[source]

CocoObjectDetectionCategory(id:str, name:str, supercategory:Optional[str]=None) :: CocoCategory

CocoObjectDetectionCategory(id:str, name:str, supercategory:Union[str, NoneType]=None)

c = CocoObjectDetectionCategory(id='12345678', name='Бреф Кольорова вода Евкаліпт НОВИНКА!!! 50 г')
assert c.is_valid()
a = c.get_dir_name()
assert a == 'Бреф_Кольорова_вода_Евкаліпт_НОВИНКА_50_г--12345678', a

class CocoDataset[source]

CocoDataset(annotations:List[CocoAnnotation]=<factory>, images:List[CocoImage]=<factory>, info:CocoInfo=CocoInfo(year=None, version=None, description=None, contributor=None, url=None, date_created=None), licenses:List[CocoLicense]=<factory>) :: CocoElement

CocoDataset(annotations:List[cocorepr.coco.CocoAnnotation]=, images:List[cocorepr.coco.CocoImage]=, info:cocorepr.coco.CocoInfo=CocoInfo(year=None, version=None, description=None, contributor=None, url=None, date_created=None), licenses:List[cocorepr.coco.CocoLicense]=)

non_col = CocoDataset.get_non_collective_elements()
col = CocoDataset.get_collective_elements()

assert non_col == ['info'], non_col
assert col == ['annotations', 'images', 'licenses'], col

class CocoObjectDetectionDataset[source]

CocoObjectDetectionDataset(annotations:List[CocoObjectDetectionAnnotation]=<factory>, images:List[CocoImage]=<factory>, info:CocoInfo=CocoInfo(year=None, version=None, description=None, contributor=None, url=None, date_created=None), licenses:List[CocoLicense]=<factory>, categories:List[CocoObjectDetectionCategory]=<factory>) :: CocoDataset

CocoObjectDetectionDataset(annotations:List[cocorepr.coco.CocoObjectDetectionAnnotation]=, images:List[cocorepr.coco.CocoImage]=, info:cocorepr.coco.CocoInfo=CocoInfo(year=None, version=None, description=None, contributor=None, url=None, date_created=None), licenses:List[cocorepr.coco.CocoLicense]=, categories:List[cocorepr.coco.CocoObjectDetectionCategory]=)

non_col = CocoObjectDetectionDataset.get_non_collective_elements()
col = CocoObjectDetectionDataset.get_collective_elements()

assert non_col == ['info'], non_col
assert col == ['annotations', 'categories', 'images', 'licenses'], col
dataset = CocoObjectDetectionDataset(
    info=CocoInfo(year=2017, version='1.0', description='COCO 2017 Dataset', contributor='COCO Consortium', url='http://cocodataset.org', date_created='2017/09/01'),
    images=[CocoImage(id='362343', coco_url='http://image')], 
    annotations=[
      CocoObjectDetectionAnnotation(id='402717', image_id='362343', category_id='10', bbox=(196.7, 254.52, 9.89, 23.19), iscrowd=0),
    ],
    categories=[CocoObjectDetectionCategory(id='10', name="person")],
)

assert dataset.is_valid()
dataset.to_dict_skip_nulls()
{'annotations': [{'id': '402717',
   'image_id': '362343',
   'category_id': '10',
   'bbox': (196, 254, 9, 23),
   'iscrowd': 0}],
 'images': [{'id': '362343', 'coco_url': 'http://image'}],
 'info': {'year': 2017,
  'version': '1.0',
  'description': 'COCO 2017 Dataset',
  'contributor': 'COCO Consortium',
  'url': 'http://cocodataset.org',
  'date_created': '2017/09/01'},
 'licenses': [],
 'categories': [{'id': '10', 'name': 'person'}]}
dataset2 = CocoObjectDetectionDataset.from_dict(dataset.to_dict())
assert dataset2 == dataset, display(dataset2, dataset)
dataset.to_full_str()
'CocoObjectDetectionDataset(annotations=1, categories=1, images=1, licenses=0)'
d = {'info': {'year': 2017,
  'version': '1.0',
  'description': 'COCO 2017 Dataset',
  'contributor': 'COCO Consortium',
  'url': 'http://cocodataset.org',
  'date_created': '2017/09/01'},
 'images': [
     {'id': '204800',
      'width': 1920,
      'height': 2560,
      'file_name': 'e2e76667-f7e.jpg',
      'license': 1,
      'coco_url': 'https://e2e76667-f7e.jpg',
      'date_captured': '2021-01-05 13:18:13'},
 ],
 'annotations': [{'id': '402717',
   'image_id': '362343',
   'category_id': '10',
   'bbox': (196.7, 254.52, 9.89, 23.19),
   'iscrowd': 0}],
    'categories':[]}

CocoObjectDetectionDataset.from_dict(d)
CocoObjectDetectionDataset(annotations=[CocoObjectDetectionAnnotation(id='402717', image_id='362343', category_id='10', bbox=(196, 254, 9, 23), supercategory=None, area=None, iscrowd=0)], images=[CocoImage(id='204800', coco_url='https://e2e76667-f7e.jpg', width=1920, height=2560, license=1, file_name='e2e76667-f7e.jpg', flickr_url=None, date_captured='2021-01-05 13:18:13')], info=CocoInfo(year=2017, version='1.0', description='COCO 2017 Dataset', contributor='COCO Consortium', url='http://cocodataset.org', date_created='2017/09/01'), licenses=[], categories=[])
raw = {
    "id": '204800',
    "license": 1,
    "coco_url": "https://e2e76667-f7e.jpg",
    "width": 1920,
    "height": 2560,
    "file_name": "e2e76667-f7e.jpg",
    "date_captured": "2021-01-05 13:18:13"
}
display(CocoImage.from_dict(raw))
del raw["date_captured"]
display(CocoImage.from_dict(raw))
del raw["id"]
try:
    display(CocoImage.from_dict(raw))
except KeyError:
    pass
else:
    assert False, "no exception"
CocoImage(id='204800', coco_url='https://e2e76667-f7e.jpg', width=1920, height=2560, license=1, file_name='e2e76667-f7e.jpg', flickr_url=None, date_captured='2021-01-05 13:18:13')
CocoImage(id='204800', coco_url='https://e2e76667-f7e.jpg', width=1920, height=2560, license=1, file_name='e2e76667-f7e.jpg', flickr_url=None, date_captured=None)

get_dataset_class[source]

get_dataset_class(coco_kind:str)

get_dataset_class("object_detection")
__main__.CocoObjectDetectionDataset

merge_datasets[source]

merge_datasets(d1:CocoDataset, d2:CocoDataset, update:bool=False)

d1 = {
  'info': {},
  'licenses': [],
  'images': [
    {'id': '1', 'coco_url': 'https://image1.jpg'}
  ],
  'annotations': [
    {'id': '10', 'image_id': '1', 'category_id': '1', 'bbox': (4,3,2,1)}
  ],
  'categories': [
    {'id': '1', 'name': 'human'}
  ]
}

d2 = {
  'info': {},
  'licenses': [],
  'images': [
    {'id': '1', 'coco_url': 'https://image1.jpg'},
    {'id': '2', 'coco_url': 'https://image2.jpg'}
  ],
 'annotations': [
   {'id': '10', 'image_id': '1', 'category_id': '1', 'bbox': (4,3,2,1)},
   {'id': '11', 'image_id': '2', 'category_id': '2', 'bbox': (1,2,3,4)}
  ],
 'categories': [
   {'id': '2', 'name': 'animal'}
  ]
}
d_res = merge_datasets(CocoObjectDetectionDataset.from_dict(d1),
                       CocoObjectDetectionDataset.from_dict(d2))
res = d_res.to_dict_skip_nulls()
display(res)
assert res == {'images': [{'id': '1', 'coco_url': 'https://image1.jpg'},
  {'id': '2', 'coco_url': 'https://image2.jpg'}],
 'info': {},
 'licenses': [],
 'annotations': [{'id': '10',
   'image_id': '1',
   'category_id': '1',
   'bbox': (4, 3, 2, 1)},
  {'id': '11', 'image_id': '2', 'category_id': '2', 'bbox': (1, 2, 3, 4)}],
 'categories': [{'id': '1', 'name': 'human'}, {'id': '2', 'name': 'animal'}]}, res
{'annotations': [{'id': '10',
   'image_id': '1',
   'category_id': '1',
   'bbox': (4, 3, 2, 1)},
  {'id': '11', 'image_id': '2', 'category_id': '2', 'bbox': (1, 2, 3, 4)}],
 'images': [{'id': '1', 'coco_url': 'https://image1.jpg'},
  {'id': '2', 'coco_url': 'https://image2.jpg'}],
 'info': {},
 'licenses': [],
 'categories': [{'id': '1', 'name': 'human'}, {'id': '2', 'name': 'animal'}]}
d2['annotations'][0]['bbox'] = (100, 101, 102, 103)
assert d1['annotations'][0]['id'] == d2['annotations'][0]['id'] \
    and d2['annotations'][0]['bbox'] != d1['annotations'][0]['bbox']

try:
    d_res = merge_datasets(CocoObjectDetectionDataset.from_dict(d1),
                           CocoObjectDetectionDataset.from_dict(d2))
except ValueError:
    pass
else:
    assert False, 'test failed'
expected_merge_update_result = {
    'info': {},
    'licenses': [],
    'images': [
        {'id': '1', 'coco_url': 'https://image1.jpg'},
        {'id': '2', 'coco_url': 'https://image2.jpg'}
    ],
    'annotations': [
        {'id': '10', 'image_id': '1', 'category_id': '1', 'bbox': (100, 101, 102, 103)},
        {'id': '11', 'image_id': '2', 'category_id': '2', 'bbox': (1, 2, 3, 4)}
    ],
    'categories': [
        {'id': '1', 'name': 'human'},
        {'id': '2', 'name': 'animal'}
    ]
}

actual_merge_update_result = merge_datasets(
    CocoObjectDetectionDataset.from_dict(d1),
    CocoObjectDetectionDataset.from_dict(d2),
    update=True
).to_dict_skip_nulls()
assert actual_merge_update_result == expected_merge_update_result, actual_merge_update_result
Updating 'annotations' of id=10: '{'id': '10', 'image_id': '1', 'category_id': '1', 'bbox': [4, 3, 2, 1], 'supercategory': None, 'area': None, 'iscrowd': None}' -> '{'id': '10', 'image_id': '1', 'category_id': '1', 'bbox': [100, 101, 102, 103], 'supercategory': None, 'area': None, 'iscrowd': None}'
Updating 'annotations' of id=10: '{'id': '10', 'image_id': '1', 'category_id': '1', 'bbox': [4, 3, 2, 1], 'supercategory': None, 'area': None, 'iscrowd': None}' -> '{'id': '10', 'image_id': '1', 'category_id': '1', 'bbox': [100, 101, 102, 103], 'supercategory': None, 'area': None, 'iscrowd': None}'
assert merge_datasets(None, CocoObjectDetectionDataset.from_dict(d2)) == CocoObjectDetectionDataset.from_dict(d2)

shuffle[source]

shuffle(arr)

a = [1,2,3,4,5]
b = shuffle(a)
a, b
([1, 2, 3, 4, 5], [5, 1, 3, 4, 2])

cut_annotations_per_category[source]

cut_annotations_per_category(coco:CocoDataset, max_annotations_per_category:int)

Returns a copy of the input dataset where each class (category) contains up to max_crops_per_class crops (annotations)

res = cut_annotations_per_category(
    CocoObjectDetectionDataset.from_dict(
        {'info': {},
         'images': [{'id': '1', 'coco_url': 'https://image1.jpg'}],
         'annotations': [
             {'id': '10', 'image_id': '1', 'category_id': '1', 'bbox': (4,3,2,1)},
             {'id': '11', 'image_id': '1', 'category_id': '1', 'bbox': (4,3,2,1)},
             {'id': '12', 'image_id': '1', 'category_id': '2', 'bbox': (4,3,2,1)},
             {'id': '13', 'image_id': '1', 'category_id': '2', 'bbox': (4,3,2,1)},
         ],
         'categories': [
             {'id': '1', 'name': 'animal'},
             {'id': '2', 'name': 'animal'},
         ]}
    ),
    max_annotations_per_category=1
)
assert len(res.annotations) == 2, res.to_dict_skip_nulls()

remove_invalid_elements[source]

remove_invalid_elements(coco:CocoDataset)

d = {'info': {},
 'images': [{'id': '1', 'coco_url': 'https://image1.jpg'},
            {'id': '', 'coco_url': 'https://image2.jpg'},
            {'id': '2', 'coco_url': ''},
            {'id': '3','coco_url': 'https://valid-but-unused'},
           ],
 'annotations': [
     {'id': '10',
      'image_id': '1',
      'category_id': '1',
      'bbox': (4,3,2,1)},
     {'id': '',
       'image_id': '2',
       'category_id': '1',
       'bbox': (1,2,3,4)},
     {'id': 'ANN-01',
       'image_id': '1',
       'category_id': '1',
       'bbox': (1,2,3,4)},
     {'id': '3',
       'image_id': '2',
       'category_id': '',
       'bbox': (1,2,3,4)},
     {'id': '4',
       'image_id': '2',
       'category_id': '2',
       'bbox': (1,2,3,0)},
     {'id': '5',
       'image_id': '2',
       'category_id': '2',
       'bbox': (1,2,-2,4)},
 ],
 'categories': [
     {'id': '1', 'name': 'animal'},
     {'id': '', 'name': 'nobody'},
 ]}

c = CocoObjectDetectionDataset.from_dict(d)
c2 = remove_invalid_elements(c)
d2 = c2.to_dict_skip_nulls()
assert d2 == {
    'annotations': [
      {
        'id': '10',
        'image_id': '1',
        'category_id': '1',
        'bbox': (4, 3, 2, 1)
      },
      {
        'id': 'ANN-01',
        'image_id': '1',
        'category_id': '1',
        'bbox': (1,2,3,4)
      },
    ],
    'images': [
         {'id': '1', 'coco_url': 'https://image1.jpg'}],
     'info': {},
     'licenses': [],
     'categories': [{'id': '1', 'name': 'animal'}],
}, d2