from typing import Any, Self
from collections.abc import Iterator, Iterable
from functools import singledispatchmethod
import json
from json.decoder import JSONDecodeError
from ast import literal_eval
from pandas import Series, DataFrame
from .exceptions import ParseError, SchemaError, CastError, ValidationErrors
from .jsonobject import SchemaMeta, JsonObject
type Json = dict[str, Any]
type Record = str | bytes | bytearray | Json | Series | JsonObject | None
type Records = str | bytes | bytearray | DataFrame | Record | Iterable[Record]
# ToDo: Add polar-rs support!
[docs]
class JsonObjects[T]:
"""List-like container for JSON-serializable dictionaries.
This class is not meant to ever be instantiated directly. Rather,
inherit from it, and specify a subclass of ``JsonObject`` with the
`item_type` class keyword on sub-classing.
Parameters
----------
items : list, optional
List of JSON-serializable dictionaries. Defaults to an empty list.
*args
Additionally, any number of JSON-serializable dictionaries with the
schema specified by the `item_type` can be provided. These will be
appended to the `items`.
Raises
------
ParseError
If the constructor argument(s) can not be parsed as a list of
dictionaries.
SchemaError
If `item_type` is not a subclass of ``JsonObject``.
Warnings
--------
This class is rather heavy, so do not use it to, e.g., wrap JSON payloads
in high-throughput low-latency web services!
"""
def __init__(self, items: Records | Self = (), *args: Record) -> None:
parsed = self.__parse(items)
itemized = self.__itemize(parsed)
self.__items = self.__wrap([*itemized, *args])
def __init_subclass__(cls, **kwargs: Any) -> None:
try:
item_type = kwargs.pop('item_type')
except KeyError:
try:
item_type = cls.mro()[1].__item_type__
except AttributeError as error:
msg = ('If not inherited, an "item_type" must be defined as'
'a keyword argument in the class call on definition!')
raise SchemaError(msg) from error
cls.__item_type__ = cls.__class_checked(item_type)
super().__init_subclass__(**kwargs)
def __str__(self) -> str:
return json.dumps(self.__items, default=lambda obj: obj.as_json)
def __repr__(self) -> str:
return json.dumps(self.__items, indent=4, default=lambda o: o.as_json)
def __iter__(self) -> Iterator[T]:
return self.__items.__iter__()
def __reversed__(self) -> Self:
return self.__class__(reversed(self.__items))
def __len__(self) -> int:
return self.__items.__len__()
def __getattr__(self, key: str) -> list:
missing = object()
values = [item.get(key, missing) for item in self.__items]
if all(value is missing for value in values):
cls = self.__class__.__name__
msg = f"'{cls}' object has no attribute '{key}'"
raise AttributeError(msg)
return [None if value is missing else value for value in values]
@singledispatchmethod
def __getitem__(self, index: int) -> T:
return self.__items[index]
@__getitem__.register
def _(self, index: slice) -> Self:
return self.__class__(*self.__items[index])
@__getitem__.register
def _(self, key: str) -> list:
missing = object()
values = [item.get(key, missing) for item in self.__items]
if all(value is missing for value in values):
raise KeyError(key)
return [None if value is missing else value for value in values]
def __bool__(self) -> bool:
return bool(self.__items)
def __contains__(self, other: Record) -> bool:
try:
return self.__item_type__(other) in self.__items
except (ParseError, CastError, ValidationErrors):
return False
__hash__ = None
def __eq__(self, other: Self) -> bool:
if isinstance(other, self.__class__):
return self.__items == other.__items
return NotImplemented
def __ne__(self: T, other: T) -> bool:
if isinstance(other, self.__class__):
return self.__items != other.__items
return NotImplemented
def __add__(self, others: Records | Self) -> Self:
return self.__class__(self, *self.__class__(others))
def __radd__(self, others: Records | Self) -> Self:
return self.__class__(self.__class__(others), *self)
[docs]
def __call__(self, mapping: Record = None, **kwargs: Any) -> Self:
"""Update one or more (nested) fields within each item.
Parameters
----------
mapping: dict or str, optional
Dictionary with string keys, JSON string/bytes, or pandas Series.
Defaults to an empty dictionary.
**kwargs:
Can be any value or, for nested structures, again a dictionary with
string keys or a JSON string/bytes or a pandas Series. Keyword
arguments will override values already present in the `mapping`.
Returns
-------
JsonObjects
A new instance of self with updated fields in each item.
Raises
------
ParseError
If the (keyword) arguments cannot be parsed into a dictionary with
string keys.
CastError
If the dictionary values cannot be cast into the types specified in
the schema of the `item_type`.
"""
return self.__class__(item(mapping, **kwargs) for item in self)
@property
def as_json(self) -> list[Json]:
"""JSON-serializable representation."""
return [item.as_json for item in self.__items]
@property
def as_dtype(self) -> str:
"""Representation in the cell of a pandas data frame."""
return self.__str__()
@property
def as_df(self) -> DataFrame:
"""Representation as a pandas data frame."""
data = [item.as_series for item in self]
if data:
columns = None
else:
columns = list(self.__item_type__.__annotations__.keys())
df = DataFrame(data, columns=columns)
df.columns.name = self.__item_type__.__name__
return df.reset_index(drop=True)
@staticmethod
def __class_checked(item_type: type[JsonObject]) -> type[JsonObject]:
"""Allow only JsonObject and JsonObjects as item_type."""
right_type = isinstance(item_type, SchemaMeta)
right_class = issubclass(item_type, JsonObject)
if right_type and right_class:
return item_type
raise SchemaError('item_type must be a subclass of JsonObject!')
@staticmethod
def __parse(items: Records) -> list[Record]:
"""Parse input into a list of something."""
# Define parsers for converting input into a list of items.
parsers = (
lambda x: json.loads(x), # JSON string
lambda x: literal_eval(x), # Some other string
lambda x: x.to_dict(orient='records'), # Dataframe
lambda x: [] if x is None else x # None or some other object
)
# Try parsers one after another
for parse in parsers:
try:
parsed = parse(items)
except (
JSONDecodeError, # json.loads
TypeError, # json.loads and literal_eval
ValueError, # literal_eval
SyntaxError, # literal_eval
AttributeError # Dataframe
):
continue
else:
return parsed
raise ParseError(f'Could not parse {items} as JSON!')
@staticmethod
def __itemize(items: list[Record]) -> list[Json]:
"""Convert list of something into a list of JSONs."""
# Define patterns that convert input to a list of dicts
patterns = (
lambda x: [{**x}], # List of dicts
lambda x: [*x] # List of iterable, possibly a string
)
# First, try list of dicts, then list of anything
for pattern in patterns:
try:
itemized = pattern(items)
except TypeError:
continue
else:
return itemized
raise ParseError(f'Could not parse {items} as JSON!')
def __wrap(self, items: list[Record]) -> list[T]:
"""Cast each item in a list of JSONs to the item schema."""
errors = []
wrapped = []
for item in items:
try:
wrapped.append(self.__item_type__(item))
except (ParseError, CastError, ValidationErrors) as error:
errors.append(error)
if errors:
raise ValidationErrors(self.__class__.__name__, errors)
return wrapped