Notebook Simple Custom Info

import pandas as pd
import numpy as np
import df_file_interchange as fi
from pathlib import Path

# Make things a little easier in terms of syntax
from df_file_interchange.ci.extra.std_extra import FIStdExtraInfo
from df_file_interchange.ci.structured import FIStructuredCustomInfo
from df_file_interchange.ci.unit.currency import FICurrencyUnit
from df_file_interchange.ci.unit.population import FIPopulationUnit
# Create basic dataframe
df = pd.DataFrame(np.random.randn(3, 4), columns=["a", "b", "c", "d"])
df["pop"] = pd.array([1234, 5678, 91011])

# Define some units
unit_cur_a = FICurrencyUnit(unit_desc="USD", unit_multiplier=1000)
unit_cur_b = FICurrencyUnit(unit_desc="EUR", unit_multiplier=1000)
unit_cur_c = FICurrencyUnit(unit_desc="JPY", unit_multiplier=1000000)
unit_cur_d = FICurrencyUnit(unit_desc="USD", unit_multiplier=1000)
unit_pop = FIPopulationUnit(unit_desc="people", unit_multiplier=1)

# Define some extra info
extra_info = FIStdExtraInfo(author="Spud", source="Potato")

# Put that together into a custom_info object
custom_info = FIStructuredCustomInfo(
    extra_info=extra_info,
    col_units={
        "a": unit_cur_a,
        "b": unit_cur_b,
        "c": unit_cur_c,
        "d": unit_cur_d,
        "pop": unit_pop,
    },
)
# Now, lets write the dataframe to file

data_dir = Path("./data/")
data_dir.mkdir(exist_ok=True)
datafile_csv = Path(data_dir / "tutorial_simple_structured_custom_info.csv")

# Write to a CSV file (file format determined by extension of datafile_csv_path)
metafile_yaml = fi.write_df_to_file(df, datafile_csv, custom_info=custom_info)
metafile_yaml
PosixPath('data/tutorial_simple_structured_custom_info.yaml')
# Read the dataframe back into df_reload along with metainfo in metainfo_reload
(df_reload, metainfo_reload) = fi.read_df(metafile_yaml)
# Check df_reload is the same as the original (only for demo purposes)
fi.chk_strict_frames_eq_ignore_nan(df, df_reload)
True
# Lets have a look at the custom info we read from the file
metainfo_reload.custom_info
FIStructuredCustomInfo(unstructured_data={}, extra_info=FIStdExtraInfo(author='Spud', source='Potato', description=None, processed_date=None, processed_by=None, classname='FIStdExtraInfo'), col_units={'a': FICurrencyUnit(unit_desc='USD', unit_multiplier=1000.0, unit_year=None, unit_year_method=None, unit_date=None, classname='FICurrencyUnit'), 'b': FICurrencyUnit(unit_desc='EUR', unit_multiplier=1000.0, unit_year=None, unit_year_method=None, unit_date=None, classname='FICurrencyUnit'), 'c': FICurrencyUnit(unit_desc='JPY', unit_multiplier=1000000.0, unit_year=None, unit_year_method=None, unit_date=None, classname='FICurrencyUnit'), 'd': FICurrencyUnit(unit_desc='USD', unit_multiplier=1000.0, unit_year=None, unit_year_method=None, unit_date=None, classname='FICurrencyUnit'), 'pop': FIPopulationUnit(unit_desc='people', unit_multiplier=1, classname='FIPopulationUnit')}, classname='FIStructuredCustomInfo')
# The custom info must always be able to serialize itself, so we can dump that
metainfo_reload.custom_info.model_dump()
{'unstructured_data': {},
 'extra_info': {'author': 'Spud',
  'source': 'Potato',
  'description': None,
  'processed_date': None,
  'processed_by': None,
  'classname': 'FIStdExtraInfo'},
 'col_units': {'a': {'unit_desc': 'USD',
   'unit_multiplier': 1000.0,
   'unit_year': None,
   'unit_year_method': None,
   'unit_date': None,
   'classname': 'FICurrencyUnit'},
  'b': {'unit_desc': 'EUR',
   'unit_multiplier': 1000.0,
   'unit_year': None,
   'unit_year_method': None,
   'unit_date': None,
   'classname': 'FICurrencyUnit'},
  'c': {'unit_desc': 'JPY',
   'unit_multiplier': 1000000.0,
   'unit_year': None,
   'unit_year_method': None,
   'unit_date': None,
   'classname': 'FICurrencyUnit'},
  'd': {'unit_desc': 'USD',
   'unit_multiplier': 1000.0,
   'unit_year': None,
   'unit_year_method': None,
   'unit_date': None,
   'classname': 'FICurrencyUnit'},
  'pop': {'unit_desc': 'people',
   'unit_multiplier': 1,
   'classname': 'FIPopulationUnit'}},
 'classname': 'FIStructuredCustomInfo'}