@@ -114,10 +114,171 @@ You can find more information annotation format conversion :ref:`here <ref_conve
114114 )
115115
116116
117+ Converting CSV and JSONL Formats for Annotation Management in SuperAnnotate
118+ ---------------------------------------------------------------------------
119+ SuperAnnotate primarily uses the **JSONL format ** for annotation import/export. However,
120+ many external tools use **CSV **, requiring users to convert between these formats for seamless data management.
121+
122+ This guide provides:
123+
124+ - CSV to JSONL conversion** for annotation uploads.
125+ - Fetching annotations from SuperAnnotate** and converting them into JSONL/CSV.
126+ - Correct metadata mappings** to ensure consistency in the annotation format.
127+
128+
129+ SuperAnnotate JSONL Schema Overview
130+ ===================================
131+ Before diving into conversions, here's a breakdown of SuperAnnotate's JSONL schema:
132+
133+ .. code-block :: json
134+
135+ {
136+ "metadata" : {
137+ "name" : " sample_image.jpg" ,
138+ "item_category" : { "value" : " category1" },
139+ "folder_name" : " dataset_folder"
140+ },
141+ "data" : {
142+ "attribute1" : { "value" : " label1" },
143+ "attribute2" : { "value" : " label2" }
144+ }
145+ }
146+
147+ Key Fields:
148+ - **metadata.name ** → The item's name (e.g., image file name).
149+ - **metadata.item_category ** → Optional category assigned to the item.
150+ - **metadata.folder_name ** → The dataset folder name (previously `_folder ` in CSV).
151+ - **data ** → Stores key-value pairs for attributes.
152+
153+
154+ Converting CSV to JSONL and Uploading Annotations
155+ =================================================
156+
157+ Steps to Convert CSV to JSONL
158+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
159+
160+ 1. Read the **CSV file ** and extract annotation fields.
161+ 2. Map metadata (`_item_name `, `_item_category `, `_folder `) to **SuperAnnotate's JSONL format **.
162+ 3. Convert remaining fields into JSONL **data attributes **.
163+ 4. Upload the JSONL file to **SuperAnnotate using SAClient **.
164+
165+ Example Python Script:
166+
167+ .. code-block :: python
168+
169+ import csv
170+ import json
171+ from pathlib import Path
172+ from superannotate import SAClient
173+
174+ def csv_to_jsonl (csv_path , jsonl_path ):
175+ """ Convert CSV annotations to JSONL format with correct mappings."""
176+ with open (csv_path, newline = ' ' , encoding = ' utf-8' ) as csv_file, open (jsonl_path, ' w' , encoding = ' utf-8' ) as jsonl_file:
177+ reader = csv.DictReader(csv_file)
178+
179+ for row in reader:
180+ jsonl_entry = {
181+ " metadata" : {
182+ " name" : row[" _item_name" ],
183+ " item_category" : {" value" : row[" _item_category" ]},
184+ " folder_name" : row[" _folder" ]
185+ },
186+ " data" : {}
187+ }
188+
189+ for key, value in row.items():
190+ if key not in [" _item_name" , " _item_category" , " _folder" ]:
191+ jsonl_entry[" data" ][key] = {" value" : json.loads(value)}
192+
193+ json.dump(jsonl_entry, jsonl_file)
194+ jsonl_file.write(' \n ' )
195+
196+ # Convert CSV to JSONL
197+ csv_to_jsonl(" annotations.csv" , " annotations.jsonl" )
198+
199+ # Upload to SuperAnnotate
200+ sa = SAClient()
201+ annotations = [json.loads(line) for line in Path(" annotations.jsonl" ).open(" r" , encoding = " utf-8" )]
202+
203+ response = sa.upload_annotations(
204+ project = " project1/folder1" ,
205+ annotations = annotations,
206+ keep_status = True ,
207+ data_spec = " multimodal"
208+ )
209+
210+
211+ Fetching Annotations and Converting to JSONL/CSV
212+ ================================================
213+
214+ Steps to Retrieve and Convert Annotations:
215+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
216+
217+ 1. Fetch **annotations from SuperAnnotate ** using `sa.get_annotations() `.
218+ 2. Convert the **annotation list into JSONL format **.
219+ 3. Convert the **JSONL data into CSV ** for external use.
220+
221+ Python Script to Convert Annotations to JSONL:
222+
223+ .. code-block :: python
224+
225+ def convert_annotations_to_jsonl (annotations , jsonl_path ):
226+ """ Convert SuperAnnotate annotations list to JSONL format."""
227+ with open (jsonl_path, ' w' , encoding = ' utf-8' ) as jsonl_file:
228+ for annotation in annotations:
229+ json.dump(annotation, jsonl_file)
230+ jsonl_file.write(' \n ' )
231+
232+ # Fetch annotations from SuperAnnotate
233+ sa = SAClient()
234+ annotations = sa.get_annotations(" project" , data_spec = " multimodal" )
235+
236+ # Convert to JSONL
237+ convert_annotations_to_jsonl(annotations, " fetched_annotations.jsonl" )
238+
239+ Python Script to Convert JSONL to CSV:
240+
241+ .. code-block :: python
242+
243+ def convert_jsonl_to_csv (jsonl_path , csv_path ):
244+ """ Convert JSONL file to CSV format with correct mappings."""
245+ with open (jsonl_path, ' r' , encoding = ' utf-8' ) as jsonl_file, open (csv_path, ' w' , newline = ' ' , encoding = ' utf-8' ) as csv_file:
246+ data = [json.loads(line) for line in jsonl_file]
247+
248+ if not data:
249+ return
250+
251+ # Extract field names from the first entry
252+ fieldnames = [" _item_name" , " _item_category" , " _folder" ] + list (data[0 ][" data" ].keys())
253+
254+ writer = csv.DictWriter(csv_file, fieldnames = fieldnames)
255+ writer.writeheader()
256+
257+ for entry in data:
258+ row = {
259+ " _item_name" : entry[" metadata" ][" name" ],
260+ " _item_category" : entry[" metadata" ].get(" item_category" , {}).get(" value" ),
261+ " _folder" : entry[" metadata" ].get(" folder_name" , None )
262+ }
263+
264+ for key in entry[" data" ]:
265+ value = entry[" data" ][key]
266+ row[key] = value[" value" ] if isinstance (value, dict ) else value
267+
268+ writer.writerow(row)
269+
270+ # Convert JSONL to CSV
271+ convert_jsonl_to_csv(" fetched_annotations.jsonl" , " converted_annotations.csv" )
272+
273+ Conclusion
274+ ==========
275+ This guide provides a **seamless way to convert ** annotations between CSV and JSONL formats while maintaining
276+ compatibility with **SuperAnnotate's platform **.
277+ By following these steps, users can efficiently **import, export, and manage annotation data ** in their projects.
278+
117279pandas DataFrame out of project annotations and annotation instance filtering
118280-----------------------------------------------------------------------------
119281
120-
121282To create a `pandas DataFrame <https://pandas.pydata.org/ >`_ from project
122283SuperAnnotate format annotations:
123284
0 commit comments