Car Data Parsing Guide
Best practices for working with Carapis car-listing data.
Data structure
Listing object
Every marketplace returns the same normalized vehicle record:
{
"id": "encar_38291043",
"source": "encar",
"make": "Hyundai",
"model": "Grandeur",
"trim": "Calligraphy",
"year": 2022,
"mileage": 41000, # kilometers
"price": 34500000, # local currency (KRW here)
"priceHistory": [
{"date": "2024-10-01", "price": 36000000},
{"date": "2024-10-21", "price": 34500000},
],
"fuelType": "gasoline",
"transmission": "automatic",
"inspectionSheet": {...}, # structured condition record (where available)
"accidentHistory": "none",
"options": ["sunroof", "heated_seats", "adaptive_cruise"],
"photos": ["url1", "url2", ...],
"dealer": {"name": "...", "location": "Seoul"},
"url": "https://www.encar.com/...",
}The same field names hold whether the data came from Encar, Mobile.de or AutoTrader — the normalization is done for you.
Error handling
Handling missing data
Marketplaces differ in which fields they expose. Always check before using a value — use direct access with explicit defaults rather than assuming a field is present:
car = resp.json()
mileage = car["mileage"] if car.get("mileage") is not None else 0
price = car["price"] if car.get("price") is not None else 0
sheet = car.get("inspectionSheet") # None on marketplaces without oneTry-except patterns
Handle API errors gracefully:
import requests
try:
resp = requests.get(
f"https://api.carapis.com/v2/listings/{listing_id}",
headers={"Authorization": f"Bearer {API_KEY}"},
)
resp.raise_for_status()
car = resp.json()
except requests.HTTPError as e:
status = e.response.status_code
if status == 404:
print(f"Listing not found: {listing_id}")
elif status == 429:
print("Rate limit exceeded, backing off...")
else:
print(f"API error: {e}")Retry logic
Implement exponential backoff for transient failures:
import time
import requests
def get_listing_with_retry(listing_id, max_retries=3):
for attempt in range(max_retries):
try:
resp = requests.get(
f"https://api.carapis.com/v2/listings/{listing_id}",
headers={"Authorization": f"Bearer {API_KEY}"},
)
resp.raise_for_status()
return resp.json()
except requests.HTTPError:
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # 1s, 2s, 4s
else:
raiseData validation
Validate listing data
Check data quality before using it downstream:
def validate_listing(car):
errors = []
if not car.get("price") or car["price"] <= 0:
errors.append("invalid price")
if not car.get("year") or not (1950 <= car["year"] <= 2026):
errors.append("invalid year")
if car.get("mileage") is not None and car["mileage"] < 0:
errors.append("negative mileage")
if errors:
print(f"Validation errors for {car.get('id')}: {', '.join(errors)}")
return False
return TrueData cleaning
Normalize before persisting:
def clean_listing(car):
cleaned = {
"id": car["id"],
"source": car["source"],
"make": car["make"].strip(),
"model": car["model"].strip(),
"year": int(car["year"]) if car.get("year") else None,
"mileage": int(car["mileage"]) if car.get("mileage") is not None else None,
"price": int(car["price"]) if car.get("price") else None,
}
if cleaned["price"] and cleaned["mileage"]:
cleaned["price_per_km"] = cleaned["price"] / cleaned["mileage"]
return cleanedBatch processing
Process many listings
Pull listings in bulk, then validate and clean each:
import requests
def fetch_and_process(source, **filters):
resp = requests.get(
"https://api.carapis.com/v2/listings",
params={"source": source, "limit": 500, **filters},
headers={"Authorization": f"Bearer {API_KEY}"},
)
results = []
for car in resp.json()["results"]:
if validate_listing(car):
results.append(clean_listing(car))
return results
cars = fetch_and_process("encar", make="Kia", year_min=2020)
print(f"Processed {len(cars)} clean listings")Data export
Export to CSV
import csv
def export_to_csv(cars, filename):
fields = ["id", "source", "make", "model", "year", "mileage", "price"]
with open(filename, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fields, extrasaction="ignore")
writer.writeheader()
writer.writerows(cars)
export_to_csv(cars, "listings.csv")Export to JSON
import json
with open("listings.json", "w") as f:
json.dump(cars, f, indent=2, ensure_ascii=False)Performance optimization
Caching
Cache listings that don’t change minute-to-minute:
from functools import lru_cache
@lru_cache(maxsize=2000)
def get_listing_cached(listing_id):
return get_listing_with_retry(listing_id)Rate-limit management
Respect your plan’s per-minute limits:
import time
def rate_limited(listing_ids, requests_per_minute=100):
delay = 60.0 / requests_per_minute
for listing_id in listing_ids:
yield get_listing_cached(listing_id)
time.sleep(delay)Quick start
import requests
API_KEY = "your_carapis_key"
resp = requests.get(
"https://api.carapis.com/v2/listings",
params={"source": "encar", "make": "Genesis", "limit": 20},
headers={"Authorization": f"Bearer {API_KEY}"},
)
for car in resp.json()["results"]:
if validate_listing(car):
print(f"{car['year']} {car['make']} {car['model']} — {car['price']:,}")For the complete API surface, see the API reference. To pick the right marketplaces for your use case, see choosing car data sources.