In [1]:

import os 
import pandas as pd
import numpy as np

In [6]:

def get_file_stem(path):
    base=os.path.basename(path)
    return os.path.splitext(base)[0]

def read_metadata(df_path):  
    #read df
    df = pd.read_csv(df_path,sep=" ",header= None)
    df.columns = ["video_path","frames","label"]
    return df

def df_to_txt(df,dir_path):
    df.to_csv(dir_path, header=None, index=None, sep=' ', mode='a')

In [2]:

file = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\FinalDatasets\Datasets\HAR_Video\Base_Dataset\Train_encodded.txt"

In [7]:

df = read_metadata(file)
freq = df.label.value_counts(normalize=True)
weights  = np.empty(len(freq))
for idx,class_freq in zip(freq.index,freq.values):
    weights[idx]  = 1/class_freq

In [9]:

In [13]:

freq.index

Out[13]:

Int64Index([21, 2, 6, 11, 20, 3, 0, 7, 9, 1, 19, 5, 8, 16, 10, 15, 12, 17, 13,
            4, 18, 14],
           dtype='int64')

In [14]:

In [18]:

freq.values

Out[18]:

array([5.73450568e-01, 1.04010294e-01, 5.23268282e-02, 3.51704911e-02,
       3.51704911e-02, 3.19536779e-02, 3.06669526e-02, 2.42333262e-02,
       2.23032383e-02, 1.88719708e-02, 1.35106155e-02, 1.30817071e-02,
       1.13660733e-02, 8.57816856e-03, 6.21917221e-03, 5.36135535e-03,
       4.07463007e-03, 2.78790478e-03, 2.57345057e-03, 2.14454214e-03,
       1.71563371e-03, 4.28908428e-04])

In [20]:

In [21]:

weights

Out[21]:

array([3.26083916e+01, 5.29886364e+01, 9.61443299e+00, 3.12953020e+01,
       4.66300000e+02, 7.64426230e+01, 1.91106557e+01, 4.12654867e+01,
       8.79811321e+01, 4.48365385e+01, 1.60793103e+02, 2.84329268e+01,
       2.45421053e+02, 3.88583333e+02, 2.33150000e+03, 1.86520000e+02,
       1.16575000e+02, 3.58692308e+02, 5.82875000e+02, 7.40158730e+01,
       2.84329268e+01, 1.74382947e+00])

In [24]:

import joblib
encoder_file = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\FinalDatasets\Datasets\HAR_dataset_v1\encoder_train.pkl"
encoder_file_2  =r"C:\Users\jeuux\Downloads\encoder_train (1).pkl"
encoder = joblib.load(encoder_file_2)

In [25]:

len(encoder.classes_)

Out[25]: