import os
import pandas as pd
import numpy as np
def get_file_stem(path):
base=os.path.basename(path)
return os.path.splitext(base)[0]
def read_metadata(df_path):
#read df
df = pd.read_csv(df_path,sep=" ",header= None)
df.columns = ["video_path","frames","label"]
return df
def df_to_txt(df,dir_path):
df.to_csv(dir_path, header=None, index=None, sep=' ', mode='a')
file = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\FinalDatasets\Datasets\HAR_Video\Base_Dataset\Train_encodded.txt"
df = read_metadata(file)
freq = df.label.value_counts(normalize=True)
weights = np.empty(len(freq))
for idx,class_freq in zip(freq.index,freq.values):
weights[idx] = 1/class_freq
freq.index
Int64Index([21, 2, 6, 11, 20, 3, 0, 7, 9, 1, 19, 5, 8, 16, 10, 15, 12, 17, 13,
4, 18, 14],
dtype='int64')
freq.values
array([5.73450568e-01, 1.04010294e-01, 5.23268282e-02, 3.51704911e-02,
3.51704911e-02, 3.19536779e-02, 3.06669526e-02, 2.42333262e-02,
2.23032383e-02, 1.88719708e-02, 1.35106155e-02, 1.30817071e-02,
1.13660733e-02, 8.57816856e-03, 6.21917221e-03, 5.36135535e-03,
4.07463007e-03, 2.78790478e-03, 2.57345057e-03, 2.14454214e-03,
1.71563371e-03, 4.28908428e-04])
weights
array([3.26083916e+01, 5.29886364e+01, 9.61443299e+00, 3.12953020e+01,
4.66300000e+02, 7.64426230e+01, 1.91106557e+01, 4.12654867e+01,
8.79811321e+01, 4.48365385e+01, 1.60793103e+02, 2.84329268e+01,
2.45421053e+02, 3.88583333e+02, 2.33150000e+03, 1.86520000e+02,
1.16575000e+02, 3.58692308e+02, 5.82875000e+02, 7.40158730e+01,
2.84329268e+01, 1.74382947e+00])
import joblib
encoder_file = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\FinalDatasets\Datasets\HAR_dataset_v1\encoder_train.pkl"
encoder_file_2 =r"C:\Users\jeuux\Downloads\encoder_train (1).pkl"
encoder = joblib.load(encoder_file_2)
len(encoder.classes_)
22