# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: MIT

import re
from abc import abstractmethod, ABC
from dataclasses import dataclass
from functools import reduce
from pathlib import Path
from typing import List, Dict, Any, Union

import numpy as np
import pandas as pd

from mpp.core.devices import Device, DeviceType
from mpp.core.types import ConfigurationPaths
from mpp.core.api_args import SystemInformation


@dataclass(frozen=True)
class UnitNames:
    OS_PROCESSOR: str
    SOCKET: str
    MODULE: str
    CORE_TYPE: str
    CORE: str
    THREAD: str
    DIE: str = ''
    CORE_ARCH: str = ''
    MEMORY_CHANNELS: str = 'channels_populated'


@dataclass(frozen=True)
class SectionNames:
    PROCESSOR_FEATURES_SECTION: str
    PROCESSOR_MAP_SECTION: str
    SYSTEM_FEATURES_SECTION: Union[str, None] = None
    UNCORE_UNITS_SECTION: Union[str, None] = None
    EDP_CONFIGURATION_SECTION: Union[str, None] = None
    RDT_SECTION: Union[str, None] = None
    GPU_SECTION: Union[str, None] = None
    RAM_FEATURES_SECTION: Union[str, None] = None
    QPI_FEATURES_SECTION: Union[str, None] = None
    IIO_FEATURES_SECTION: Union[str, None] = None
    PCIE_UNITS_SECTION: Union[str, None] = None


@dataclass(frozen=True)
class SymbolNames:
    NUM_SOCKETS: str
    THREADS_PER_CORE: str
    CORES_PER_SOCKET: str
    THREADS_PER_SOCKET: Union[str, None] = None


@dataclass(frozen=True)
class SystemInfoParserAttributes:
    first_sample_indicator: str
    column_separator: str
    units: UnitNames
    sections: SectionNames
    symbols: SymbolNames


class SystemInformationParser:
    INT_LIKE_RE = re.compile(r'\d+[\d,]*')

    """
    Parse system information stored in input data files
    """

    parser_attributes: Union[None, SystemInfoParserAttributes] = None

    def __init__(self, input_file: Path, ref_tsc_hz: int = 0):
        """
        Initialize the system information parser

        :param input_file: the data file to parse
        :param ref_tsc_hz: an optional system frequency value (in Hz). Overrides system information in the input file
                            (if such information exists)

        """
        from mpp.core.api_args import SystemInformation
        self.__input_file = input_file
        self.__socket_map = {}
        self.__core_map = {}
        self.__thread_map = {}
        self.__core_type_map = {}
        self.__unique_core_types = []
        self.__module_map = {}
        self.__die_map = {}
        self.__has_modules = False
        self.__has_die = False
        self._ref_tsc = 0
        self.__parser_state = None
        self.attributes: Dict[str, Any] = {}
        self.system_features: Dict[str, Any] = {}
        self.__current_core_type: str = 'core'
        self.processor_features: Dict[str, Dict[str, any]] = {}
        self.uncore_units: Dict[str, Any] = {}
        self.configuration_file_paths: Dict[str, ConfigurationPaths] = {}
        self.pcie_units: Dict[str, Dict[str, List[str]]] = {}
        self.rdt: Dict = {}
        self.__parse()
        self.__finalize_attributes(ref_tsc_hz)
        self.api_args = SystemInformation(self.processor_features, self.system_features, self.uncore_units,
                                          self.ref_tsc, self.unique_core_types, self.has_modules)
        Device.set_valid_device_names(self.unique_core_types)

    @property
    def socket_map(self) -> Dict[int, int]:
        """
        :return: a dict mapping a logical core number to its socket number
        """
        return self.__socket_map

    @property
    def core_map(self):
        """
        :return: a dict mapping a logical core number to its physical core number
        """
        return self.__core_map

    @property
    def thread_map(self):
        """
        :return: a dict mapping a logical core number to its hardware thread number
        """
        return self.__thread_map

    @property
    def core_type_map(self) -> Dict[int, int]:
        """
        :return: a dict mapping an OS Processor number to its core type for hybrid architectures
        """
        return self.__core_type_map

    @property
    def unique_core_types(self) -> List[str]:
        """
        :return: a list of unique core types
        """
        return self.__unique_core_types

    @property
    def unique_os_processors(self) -> List[int]:
        """
        :return: a list of unique OS Processors, taken from the topology map
        """
        return self.__unique_os_processors

    @property
    def module_map(self):
        """
        :return: a dict mapping a logical core number to its module
        """
        return self.__module_map

    @property
    def die_map(self):
        """
        :return: a dict mapping a logical core number to its CBB (Core Building Block)
        """
        return self.__die_map

    @property
    def has_die(self) -> bool:
        """
        :return: a bool specifying if the platform has die information (non-NaN die values)
        """
        return self.__has_die

    @property
    def has_modules(self) -> bool:
        """
        :return: a bool specifying if the platform has modules
        """
        return self.__has_modules

    @property
    def ref_tsc(self):
        """
        :return: the system frequency in Hz
        """
        return self._ref_tsc

    @property
    def current_core_type(self):
        return self.__current_core_type

    def _set_current_core_type(self, core_type: str):
        self.__current_core_type = core_type

    @staticmethod
    def _get_processor_core_type(line: str) -> str:
        line = line.strip()
        core_type_re = re.compile(r'\((.*?)\)')
        match = re.search(core_type_re, line)
        return match.group(1) if match else DeviceType.CORE

    def _add_processor_core_type(self, core_type: str):
        self.processor_features[core_type] = {}

    def __parse(self):
        self._set_parser_state(SystemInformationParser._DefaultState())
        with open(self.__input_file, 'r') as f:
            try:
                for _, line in enumerate(f):
                    self.__parser_state.process(self, line.strip())
            except StopIteration:
                pass

    def _set_parser_state(self, new_state: '_State'):
        self.__parser_state = new_state

    def _adjust_type(self, value: str):
        try:
            if re.search(self.INT_LIKE_RE, value):
                # value looks like an int. Try to convert
                return int(value.replace(',', ''))

            if value.lower() in ['yes', 'enabled']:
                return True

            if value.lower() in ['no', 'disabled']:
                return False

            # Unable to determine the type of the value, return as is
            return value
        except ValueError:
            return str(value)

    class _State:
        def process(self, context: 'SystemInformationParser', line: str):
            pass

        @staticmethod
        def _get_attribute(context, line):
            k, v = line.split(':', 1)
            context.attributes[k.strip()] = context._adjust_type(v.strip())
            return context

    class _DefaultState(_State):
        DOT_SEPARATED_RE = re.compile(r'^(?P<name>[^/.]+)\.+(?P<value>[^/.][\s\S]+)$')

        def process(self, context: 'SystemInformationParser', line: str):
            if self.__skip_line(line):
                return

            if line.startswith(context.parser_attributes.first_sample_indicator):
                context._set_parser_state(context._FinalState())
            elif self._is_section(context.parser_attributes.sections.PROCESSOR_MAP_SECTION, line):
                context._set_parser_state(context._ProcessorMappingState())
            elif self._is_section(context.parser_attributes.sections.SYSTEM_FEATURES_SECTION, line):
                context._set_parser_state(context._SystemFeaturesState())
            elif self._is_section(context.parser_attributes.sections.PROCESSOR_FEATURES_SECTION, line):
                self.__add_core_type(context, line)
                context._set_parser_state(context._ProcessorFeaturesState())
            elif self._is_section(context.parser_attributes.sections.UNCORE_UNITS_SECTION, line):
                context._set_parser_state(context._UncoreUnitsState())
            elif self._is_section(context.parser_attributes.sections.EDP_CONFIGURATION_SECTION, line):
                context._set_parser_state(context._EdpConfigurationState())
            elif self._is_section(context.parser_attributes.sections.PCIE_UNITS_SECTION, line):
                context._set_parser_state(context._PCIEUnitsState(context, line))
            elif self._is_section(context.parser_attributes.sections.RDT_SECTION, line):
                context._set_parser_state(context._RdtSupportState())
            elif self._is_section(context.parser_attributes.sections.GPU_SECTION, line):
                context._set_parser_state(context._GpuInformationState())
            elif self._is_section(context.parser_attributes.sections.RAM_FEATURES_SECTION, line):
                context._set_parser_state(context._RamFeaturesState())
            elif self._is_section(context.parser_attributes.sections.QPI_FEATURES_SECTION, line):
                context._set_parser_state(context._QpiFeaturesState())
            elif self._is_section(context.parser_attributes.sections.IIO_FEATURES_SECTION, line):
                context._set_parser_state(context._IioFeaturesState())
            elif ':' in line:
                # Parse system information with the following format:
                #   key : value
                #
                # Example file section:
                # ...
                # NUMA node(s):                    2
                # NUMA node0 CPU(s):               0-31,64-95
                # NUMA node1 CPU(s):               32-63,96-127
                # ...
                context = self._get_attribute(context, line)
            else:
                # Parse system information with the following format:
                #   key ......... value
                #
                # Example file section:
                # ...
                # Device Type ............... Intel(R) Xeon(R) Processor code named Icelake
                # EMON Database ............. icelake_server
                # Platform type ............. 125
                # ...
                match = self.DOT_SEPARATED_RE.search(line)
                if match:
                    d = match.groupdict()
                    context.attributes[d['name'].strip()] = context._adjust_type(d['value'].strip())

        def _is_section(self, section, line):
            return section and line.startswith(section)

        def __add_core_type(self, context: 'SystemInformationParser', line: str):
            core_type = context._get_processor_core_type(line).lower()
            context._add_processor_core_type(core_type)
            context._set_current_core_type(core_type)

        def __skip_line(self, line: str) -> bool:
            should_skip = line.startswith('Copyright') \
                          or line.startswith('Application Build Date')
            return should_skip

    class _FinalState(_State):
        def process(self, context: 'SystemInformationParser', line: str):
            raise StopIteration()

    class _ProcessorMappingState(_State):
        MAP_TABLE_SEPARATOR = '-----------------------------------------'

        def __init__(self):
            self.__map_values: List[List[str]] = []
            self._is_table_start = True

        def process(self, context: 'SystemInformationParser', line: str):
            """
            Parses the "Processor Mapping" section and update system attributes

            Example file section: ::
                ...
		    OS Processor <-> Physical/Logical Mapping
		    -----------------------------------------
              OS Processor	  Phys. Package	      Core	Logical Processor	Core Type	Module
            	   0		       0		       0		   0		     bigcore		2
            	   1		       0		       0		   0		     smallcore		0
            	   2		       0		       1		   0		     smallcore		0
            	   3		       0		       2		   0		     smallcore		0
            	   4		       0		       3		   0		     smallcore		0
            	   5		       0		       0		   0		     smallcore		1
            	   6		       0		       1		   0		     smallcore		1
            	   7		       0		       2		   0		     smallcore		1
            	   8		       0		       3		   0		     smallcore		1
            	   9		       0		       0		   1		     bigcore		2
            	   10		       0		       0		   0		     bigcore		3
            	   11		       0		       0		   1		     bigcore		3
            	   12		       0		       0		   0		     bigcore		4
            	   13		       0		       0		   1		     bigcore		4
            	   14		       0		       0		   0		     bigcore		5
            	   15		       0		       0		   1		     bigcore		5
            	   16		       0		       0		   0		     bigcore		6
            	   17		       0		       0		   1		     bigcore		6
            	   18		       0		       0		   0		     bigcore		7
            	   19		       0		       0		   1		     bigcore		7
		    -----------------------------------------
            """
            if line == self.MAP_TABLE_SEPARATOR:
                if self._is_table_start:
                    self._is_table_start = False
                else:
                    context._set_processor_maps(self.__map_values)
                    context._set_parser_state(context._DefaultState())
            else:
                self.__map_values.append(list(filter(lambda s: s != '', [v.strip() for v in line.split(
                    context.parser_attributes.column_separator)])))

    class _FeatureState(ABC, _State):
        BOOL_VAL_RE = re.compile(r'^\s*\((?P<name>[\s\S]+)\)\s+\((?P<value>[\s\S]+)\)$')
        NUMERIC_VAL_RE = re.compile(r'^\s*\((?P<name>[\s\S]+):\s*(?P<value>[\s\S]+)\)$')
        VAL_RE = [BOOL_VAL_RE, NUMERIC_VAL_RE]

        def process(self, context: 'SystemInformationParser', line: str):
            if not self.done(context, line):
                self.add_feature(context, line)

        @staticmethod
        def done(context: 'SystemInformationParser', line: str):
            if not line:
                context._set_parser_state(SystemInformationParser._DefaultState())
                return True
            return False

        def add_feature(self, context: 'SystemInformationParser', line: str):
            match = None
            for pattern in context._FeatureState.VAL_RE:
                match = re.search(pattern, line)
                if match:
                    break
            if match:
                d = match.groupdict()
                name = d['name'].strip()
                value = context._adjust_type(d['value'].strip())
                self.add_feature_to_context(context, name, value)

        @abstractmethod
        def add_feature_to_context(self, context: 'SystemInformationParser', name: str, value):
            pass

    class _SystemFeaturesState(_FeatureState):
        """
        Parses the "System Features" section and update system attributes
        Example file section: ::
            ...
            System Features:
                (Number of Packages:    1)
                (Cores Per Package:    16)
                (Threads Per Package:  24)
            ...
        """

        def add_feature_to_context(self, context: 'SystemInformationParser', name: str, value):
            context.system_features[name] = value

    class _ProcessorFeaturesState(_FeatureState):
        """
        Parses the "Processor Features" section and update system attributes
        Example file section: ::
            ...
            Processor Features:
                (Thermal Throttling) (Enabled)
                (Hyper-Threading) (Enabled)
                (MLC Streamer Prefetching) (Enabled)
                (MLC Spatial Prefetching) (Enabled)
                (DCU Streamer Prefetching) (Enabled)
                (DCU IP Prefetching) (Enabled)
                (Cores Per Package:   22)
                (Threads Per Package: 44)
                (Threads Per Core:    2)
            ...
        """

        def add_feature_to_context(self, context: 'SystemInformationParser', name: str, value):
            core_type = context.current_core_type
            context.processor_features[core_type][name] = value

    class _UncoreUnitsState(_State):
        def process(self, context: 'SystemInformationParser', line: str):
            """
            Parses the "Uncore Performance Monitoring Units" section and update system attributes

            Example file section: ::
                ...
                Uncore Performance Monitoring Units:
                    cha             : 32
                    imc             : 8
                    m2m             : 4
                    qpi             : 3
                    r3qpi           : 3
                    iio             : 6
                    irp             : 6
                    pcu             : 1
                    ubox            : 1
                    m2pcie          : 6
                    rdt             : 1
                ...
            """
            if not line:
                # Done with this section
                context._set_parser_state(SystemInformationParser._DefaultState())
                return

            parts = line.split(':')
            if len(parts) > 1:
                unit = self.__get_current_unit_name(parts[0].strip())
                context.uncore_units[unit] = context._adjust_type(parts[1].strip())

        @staticmethod
        def __get_current_unit_name(unit):
            legacy_names = {
                # key = legacy unit name, value = new unit name
                'qpi': 'upi',
            }
            return legacy_names.get(unit, unit)

    class _EdpConfigurationState(_State):
        def process(self, context: 'SystemInformationParser', line: str):
            """
            Parse the "EDP Configuration" section and update system attributes

            Example file section: ::
                ...
                EDP Configuration:
                EDP events file:/opt/intel/sep_private_5.45_linux_02082134e11d103/bin64/../lib64/../config/edp/icelake_server_events_private.txt
                Hybrid Platform: No
                EDP metric file:icelake_server_private.xml
                EDP chart file:chart_format_icelake_server_private.txt

                ...
            """
            if not line:
                # Done with this section
                context._set_parser_state(SystemInformationParser._DefaultState())
                return

            context = self._set_hybrid_platform_attribute(context, line)
            self._set_metric_file_path(context, line)
            self._set_chart_file_path(context, line)

        def _set_metric_file_path(self, context, line):
            self._set_configuration_file_path(context, line, 'metric')

        def _set_chart_file_path(self, context, line):
            self._set_configuration_file_path(context, line, 'chart')

        def _set_hybrid_platform_attribute(self, context, line):
            if line.startswith('Hybrid Platform:'):
                context = self._get_attribute(context, line)
            return context

        def _set_configuration_file_path(self, context, line, configuration):
            core_type_label = self.__set_core_type_label(context, line)
            edp_configuration_file_line_start = self.__get_configuration_line_start(configuration, core_type_label)
            if line.startswith(edp_configuration_file_line_start):
                core_type = self.__get_core_type_from_label(core_type_label)
                context.configuration_file_paths[core_type] = context.configuration_file_paths.get(core_type, {})
                line = line.split(':')
                if len(line) == 2:
                    context.configuration_file_paths[core_type][f'{configuration} {ConfigurationPaths.PATH}'] = Path(
                        line[1].strip())

        @staticmethod
        def __get_configuration_line_start(configuration, core_type_label):
            return f'EDP {core_type_label}{configuration} file'

        @staticmethod
        def __get_core_type_from_label(core_type_label):
            return 'core' if not core_type_label else core_type_label.strip()

        @staticmethod
        def __set_core_type_label(context, line):
            try:
                return line.split()[1] + ' ' if context.attributes['Hybrid Platform'] else ''
            except KeyError:
                return ''


    class _PCIEUnitsState(_State):
        PCIE_RE = re.compile(r'PCIeX(8|16|32)')
        PACKAGE_RE = re.compile(r'Package (\d+)')
        UNIT_RE = re.compile(r'Unit (\d+)')

        def __init__(self, context: 'SystemInformationParser', line):
            self.__pcie_id = self.__get_id(line, self.PCIE_RE).upper()
            context.pcie_units[self.__pcie_id] = {}
            self.__package_id = None

        def process(self, context: 'SystemInformationParser', line: str):
            """
            Parses the "PCIeX_ Unit Features" section and update system attributes

            Example file section: ::
                ...
                PCIEX8 Unit Features:
                    Package 0 :
                        Unit 0 : rp4 : domain:0 bus:1d device:3 function:0
                        Unit 0 : rp5 : domain:0 bus:1d device:5 function:0
                        Unit 0 : rp6 : domain:0 bus:1d device:7 function:0
                        Unit 0 : rp7 : domain:0 bus:1d device:9 function:0
                    Package 1 :
                        Unit 1 : rp4 : domain:0 bus:1e device:3 function:0
                        Unit 2 : rp5 : domain:0 bus:23 device:5 function:0
                        Unit 2 : rp7 : domain:0 bus:23 device:9 function:0

                PCIEX16 Unit Features:
                    Package 0 :
                        Unit 0 : rp0 : domain:1 bus:43 device:2 function:0
                        Unit 0 : rp3 : domain:1 bus:43 device:8 function:0
                        Unit 1 : rp3 : domain:1 bus:46 device:8 function:0
                    Package 1 :
                        Unit 2 : rp0 : domain:1 bus:46 device:2 function:0
                        Unit 2 : rp1 : domain:1 bus:46 device:4 function:0
                        Unit 3 : rp1 : domain:1 bus:52 device:4 function:0
                ...
            """
            if self.__done(context, line):
                return

            package_id = self.__get_id(line, self.PACKAGE_RE, 1)
            if package_id:
                package_id = int(package_id)
                self.__package_id = package_id
                return

            unit_id = self.__get_id(line, self.UNIT_RE, 1)
            if unit_id:
                unit_id = int(unit_id)
                if unit_id not in context.pcie_units[self.__pcie_id]:
                    context.pcie_units[self.__pcie_id][unit_id] = self.__package_id
                    return

        @staticmethod
        def __done(context: 'SystemInformationParser', line: str):
            if not line:
                context._set_parser_state(SystemInformationParser._DefaultState())
                return True
            return False

        @staticmethod
        def __get_id(line: str, expression, group=0) -> Union[str, None]:
            match = re.search(expression, line)
            if match:
                return match.group(group)

    class _RdtSupportState(_State):
        def process(self, context: 'SystemInformationParser', line: str):
            """
            Parses the "RDT H/W Support" section and update system attributes

            Example file section: ::
                ...
                RDT H/W Support:
                    L3 Cache Occupancy		: Yes
                    Total Memory Bandwidth	: Yes
                    Local Memory Bandwidth	: Yes
                    L3 Cache Allocation		: Yes
                    L2 Cache Allocation		: No
                    Highest Available RMID	: 255
                    Sample Multiplier		: 65536
                ...
            """
            if not line:
                # Done with this section
                context._set_parser_state(SystemInformationParser._DefaultState())
                return

            parts = line.split(':')
            if len(parts) > 1:
                context.rdt[parts[0].strip()] = context._adjust_type(parts[1].strip())

    class _GpuInformationState(_State):
        def process(self, context: 'SystemInformationParser', line: str):
            """
            Parses the "GPU Information" section and update system attributes

            Example file section: ::
                ...
                GPU Information:

                TBD...
                ...
            """
            if not line:
                # Done with this section
                context._set_parser_state(SystemInformationParser._DefaultState())
                return

            # TODO: parse GPU Information section

    class _QpiFeaturesState(_State):
        def process(self, context: 'SystemInformationParser', line: str):
            """
            Parses the "QPI Link Features" section and update system attributes

            Example file section: ::
                ...
                QPI Link Features:
                    Package 0 :
                    Package 1 :
                ...
            """
            if not line:
                # Done with this section
                context._set_parser_state(SystemInformationParser._DefaultState())
                return

            # TODO: parse QPI Link Features section

    class _IioFeaturesState(_State):
        def process(self, context: 'SystemInformationParser', line: str):
            """
            Parses the "IIO Unit Features" section and update system attributes

            Example file section: ::
                ...
                IIO Unit Features:
                    Package 0 :
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                    Package 1 :
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                        domain:0 bus:0x00 stack:0 mesh: 0
                ...
            """
            if not line:
                # Done with this section
                context._set_parser_state(SystemInformationParser._DefaultState())
                return

            # TODO: parse IIO Unit Features section

    class _RamFeaturesState(_State):
        DIMM_LOCATION_RE = re.compile(r'\((\d+)/(\d+)/(\d+)\)')
        DIMM_INFO_NOT_EMPTY_RE = re.compile(r'\(dimm(?P<id>\d+) info:\s*(?!\s*Empty)(?P<value>.*)\)',
                                            flags=re.IGNORECASE)

        def __init__(self):
            self.__new_channel = False
            self.__dimm_types = set()

        def process(self, context: 'SystemInformationParser', line: str):
            """
            Parses the "RAM Features" section and stores the information in the ram_features attribute

            Example file section: ::
                ...
                RAM Features:
                    (Package/Memory Controller/Channel)
                    (0/0/0) (Total Number of Ranks on this Channel: 2)
                         (Dimm0 Info: Empty)
                         (Dimm1 Info: Empty)
                    (0/0/1) (Total Number of Ranks on this Channel: 2)
                         (Dimm0 Info: Capacity = 32, # of devices = 32, Device Config = 8Gb(2048Mbx4))
                         (Dimm1 Info: Capacity = 32, # of devices = 32, Device Config = 8Gb(2048Mbx4))
                ...
            """
            if not line:
                # Done with this section
                context._set_parser_state(SystemInformationParser._DefaultState())
                return

            new_channel = self._detect_new_channel(line)
            if new_channel:
                return
            self._increment_memory_channel(context, line)

        def _detect_new_channel(self, line):
            match = re.search(self.DIMM_LOCATION_RE, line)
            if match:
                # outer layer active
                self.__new_channel = True
                return True

        def _increment_memory_channel(self, context, line):
            match = re.search(self.DIMM_INFO_NOT_EMPTY_RE, line)
            if match and self.__new_channel:
                # inner layer active
                self.__new_channel = False
                dimm_type = match.groupdict()['value']
                self.__dimm_types.update(dimm_type)
                context.uncore_units[context.parser_attributes.units.MEMORY_CHANNELS] = context.uncore_units.get(
                    context.parser_attributes.units.MEMORY_CHANNELS, 0) + 1

    def _set_processor_maps(self, map_values: List[List[str]]):
        df = pd.DataFrame(map_values[1:], columns=map_values[0])
        if self.parser_attributes.units.CORE_TYPE not in df.columns or \
                any(df[self.parser_attributes.units.CORE_TYPE].unique() == 'invalid'):
            df[self.parser_attributes.units.CORE_TYPE] = ['core'] * len(df.index)

        if self.parser_attributes.units.MODULE not in df.columns:
            df[self.parser_attributes.units.MODULE] = ['0'] * len(df.index)
        else:
            self.__has_modules = True

        if self.parser_attributes.units.DIE in df.columns:
            df[self.parser_attributes.units.DIE] = df[self.parser_attributes.units.DIE].astype(int)
            self.__has_die = True

        try:
            df = self._get_processor_map_df(df)
            self.__socket_map = dict(zip(df[self.parser_attributes.units.OS_PROCESSOR].values,
                                         df[self.parser_attributes.units.SOCKET].values))
            self.__core_map = dict(zip(df[self.parser_attributes.units.OS_PROCESSOR].values,
                                       df[self.parser_attributes.units.CORE].values))
            self.__thread_map = dict(zip(df[self.parser_attributes.units.OS_PROCESSOR].values,
                                         df[self.parser_attributes.units.THREAD].values))
            self.__core_type_map = dict(zip(df[self.parser_attributes.units.OS_PROCESSOR].values,
                                            df[self.parser_attributes.units.CORE_TYPE].str.lower().values))
            self.__module_map = dict(zip(df[self.parser_attributes.units.OS_PROCESSOR].values,
                                         df[self.parser_attributes.units.MODULE].values))
            if self.parser_attributes.units.DIE in df.columns:
                self.__die_map = dict(zip(df[self.parser_attributes.units.OS_PROCESSOR].values,
                                      df[self.parser_attributes.units.DIE].values))
            self.__unique_core_types = list(set(self.core_type_map.values()))
            self.__unique_os_processors = list(self.core_map.keys())

        except ValueError as e:
            # TODO: log an error
            # Swallow exception so that the parser doesn't crash
            pass

    def _get_processor_map_df(self, df):
        str_columns = [self.parser_attributes.units.CORE_TYPE, self.parser_attributes.units.CORE_ARCH]
        numeric_columns = list(set(df.columns) - set(str_columns))
        numeric_columns = list(filter(lambda col: col in [getattr(self.parser_attributes.units, f) for f in
                                                          self.parser_attributes.units.__dataclass_fields__],
                                      numeric_columns))
        df[numeric_columns] = df[numeric_columns].astype(int)
        return df

    @abstractmethod
    def _set_ref_tsc(self):
        pass

    def __finalize_attributes(self, ref_tsc_hz):
        if ref_tsc_hz > 0:
            self._ref_tsc = ref_tsc_hz
        else:
            self._set_ref_tsc()

    @abstractmethod
    def _set_qpi_link_speed(self):
        pass

class SymbolTable:
    """
    Create symbol table in `MetricComputer` symbol format (Dict[str, Any])
    """
    PER_SOCKET_POSTFIX = 's_per_socket'
    UNCORE_UNIT_STRINGS = ['cha', 'upi', 'iio']
    RAM_FEATURES = [UnitNames.MEMORY_CHANNELS]


    def __init__(self,
                 system_info: SystemInformation,
                 symbols: SymbolNames = SymbolNames('Number of Packages', 'Threads Per Core', 'Cores Per Package',
                                                    'Threads Per Package')):
        self.__processor_features = system_info.processor_features
        self.__system_features = system_info.system_features
        self.__uncore_units = system_info.uncore_units
        self.__symbols = symbols
        self.__unique_core_types = system_info.unique_core_types
        self.__ref_tsc = system_info.ref_tsc
        self.__is_hybrid = len(system_info.processor_features.keys()) > 1
        self.__qpi_link_speed = system_info.qpi_link_speed

    @property
    def processor_features(self):
        return self.__processor_features

    def get_symbol_table(self, core_type=DeviceType.CORE) -> Dict[str, Any]:
        """
        :return: the requested (core_type) symbol table for `MetricComputer` from EMOM system information
        """
        core_type = self._validate_core_type(core_type)
        socket_count = self._get_socket_count(core_type)
        symbol_table = self._get_system_symbols(socket_count)
        symbol_table.update(self._get_uncore_symbols())
        symbol_table.update(self._get_ram_symbols(socket_count))
        symbol_table.update(self._get_processor_symbols(core_type))
        if self.__qpi_link_speed is not None: symbol_table.update(self.__get_qpi_link_speed())
        return symbol_table

    def _validate_core_type(self, core_type):
        """
        :return: core_type to use to get processor feature
        """
        if core_type not in self.__processor_features:
            if len(self.__processor_features) == 1 and core_type in self.__unique_core_types:
                if self.__is_hybrid:
                    # Older versions of EMON produced hybrid headers with a single set of processor features
                    # instead of per core processor features. If this is a hybrid data file and there is only
                    # one set of processor features, return those symbols for all core types, but warn the
                    # user that dependent metrics may be incorrect.
                    print('WARNING: Per core Processor Features were not found in the input data file. '
                          'Metrics that rely on per processor constants may be incorrect.')
                return list(self.__processor_features.keys())[0]
            else:
                raise ValueError(f'Processor features for \'{core_type}\' not found in input data file')
        return core_type

    def _get_socket_count(self, core_type):
        return self._get_system_feature(self.__symbols.NUM_SOCKETS, core_type)

    def _get_system_symbols(self, sockets_count):
        system_symbols = {
            'system.tsc_freq': self.__ref_tsc,
            'SYSTEM_TSC_FREQ': self.__ref_tsc,
            'system.socket_count': sockets_count,
            'SOCKET_COUNT': sockets_count,
            "DURATIONTIMEINSECONDS": 1,
            "DURATIONTIMEINMILLISECONDS": 1000
        }
        return system_symbols

    def _get_processor_symbols(self, core_type: str):
        return {
            'system.sockets[0].cores.count': self._get_processor_feature(self.__symbols.CORES_PER_SOCKET, core_type),
            'CORES_PER_SOCKET': self._get_processor_feature(self.__symbols.CORES_PER_SOCKET, core_type),
            'system.sockets[0].cpus.count': self._get_processor_feature(self.__symbols.THREADS_PER_SOCKET, core_type),
            'THREADS_PER_SOCKET': self._get_processor_feature(self.__symbols.THREADS_PER_SOCKET, core_type),
            'system.sockets[0][0].size': self.__processor_features[core_type][self.__symbols.THREADS_PER_CORE],
            'THREADS_PER_CORE': self.__processor_features[core_type][self.__symbols.THREADS_PER_CORE],
            'HYPERTHREADING_ON': self.__processor_features[core_type].get('Hyper-Threading', False),
        }

    def _get_processor_feature(self, key: str, core_type: str):
        return self.__processor_features[core_type].get(key, self.__system_features.get(key, None))

    def _get_system_feature(self, key: str, core_type: str):
        if key in self.__system_features:
            return self.__system_features[key]
        return self.__processor_features[core_type].get(key, None)

    def _get_uncore_symbols(self):
        uncore_symbols = {}
        for unit in self.UNCORE_UNIT_STRINGS:
            if unit in self.__uncore_units:
                name = f'{unit}{self.PER_SOCKET_POSTFIX}'.upper()
                uncore_symbols[name.upper()] = f'system.{name}'
                uncore_symbols[f'system.{name}'] = self.__uncore_units[unit]
        return uncore_symbols

    def _get_ram_symbols(self, socket_count: int):
        ram_symbols = {}
        for feature in self.RAM_FEATURES:
            ram_feature = self.__uncore_units.get(feature, 0)
            symbol = f'{feature}{self.PER_SOCKET_POSTFIX[1:]}'.upper()
            ram_symbols[symbol] = ram_feature / socket_count
        return ram_symbols

    def __get_qpi_link_speed(self):
        return {'UPI_speed_Tps': self.__qpi_link_speed}
