#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import os import numpy as np # In[ ]: from moviepy.editor import VideoFileClip, concatenate_videoclips # In[ ]: base_dir = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\Videos_raw" clip1 = VideoFileClip(os.path.join(base_dir,"1005b_part1.mp4")) clip2 = VideoFileClip(os.path.join(base_dir,"1005b_part2.mp4")) final_clip = concatenate_videoclips([clip1,clip2]) final_clip.write_videofile("1005b.mp4") # # Dataset # In[ ]: data_file = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\FinalDatasets\Participants\1105e\FullDataset\Final_data_1105e.pkl" data = pd.read_pickle(data_file) # In[ ]: data # In[ ]: def windows(data, size,factor=2): start = 0 while start + (size / factor) < len(data): yield int(start), int(start + size) start += (size / factor) # In[ ]: def to_seconds(time,factor = 1000): seconds = time/factor return round(seconds,2) # In[ ]: def get_pictures_dataset(data,user,window_size,factor =1): df_pictures = {"id":[],"target":[],"start":[],"end":[]} for idx,(start, end) in enumerate(windows(data.index, window_size,factor)): df_pictures["id"].append(f"{user}_{idx}") df_pictures["target"].append(data.loc[start:end,"picture"].mode()[0]) df_pictures["start"].append(to_seconds(data.loc[start,"Recording timestamp"])) df_pictures["end"].append(to_seconds(data.loc[end,"Recording timestamp"])) return pd.DataFrame.from_dict(df_pictures).set_index("id") # In[ ]: user = "1105e" test = get_pictures_dataset(data,user,500) # In[ ]: def filter_nulls(df): #get samples of majoritary class different than null samples_max=max(test[test["target"]!="Null"] .target \ .value_counts() ) #get id for null and rest of labels null_idx=df[df["target"]=="Null"].index samples_null = len(null_idx) if(samples_null>samples_max): #perform random undersampling rest_idx = df[df["target"]!="Null"].index # sample a subset of id from null samples to match majoritary class samples selected_idx = np.random.choice(range(samples_null), size=samples_max, replace=False) null_idx=null_idx[selected_idx] #filter df df = df.loc[null_idx|rest_idx] return df # In[ ]: df= filter_nulls(test) # In[ ]: df # # Video # In[ ]: from moviepy.editor import * # In[ ]: path_video = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\Videos_raw\1105e.mp4" video = VideoFileClip(path_video).resize((224,224)) # In[ ]: df = df.reset_index() # In[ ]: video_dataset = { "video_path": [], "label": [], "frames": [] } root_path = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\FinalDatasets\Participants" video_folder = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\FinalDatasets\Participants\1105e\Clips" participant ="1105e" if not(os.path.isdir(video_folder)): os.makedirs(video_folder) # In[ ]: for idx in tqdm(df.index): #get clip clip = video.subclip(df.loc[idx,"start"],df.loc[idx,"end"]) n_frames = clip.duration * clip.fps label = df.loc[idx,"target"] # Write the result to a file clip_id = df.loc[idx,"id"] video_rel_path = os.path.join(participant,"Clips",f"{clip_id}.mp4") clip_file= os.path.join(root_path,video_rel_path) clip.write_videofile(clip_file,audio=False,logger = None) #update dataset video_dataset["video_path"].append(video_rel_path) video_dataset["label"].append(label) video_dataset["frames"].append(n_frames) # In[ ]: video_dataset # In[9]: media_path = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\ProjectData\Media" participants_path = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\ProjectData\Participants" recordings_path = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\ProjectData\Recordings" # In[17]: import xml.etree.ElementTree as ET def get_media_keys(xml_file): tree = ET.parse(xml_file) root = tree.getroot() key = root.find("Key").text filename = root.find("TargetFileName").text return key,filename def get_participant_keys(xml_file): tree = ET.parse(xml_file) root = tree.getroot() key = root.find("Key").text participant = root.find("Name").text return key,participant def get_recording_keys(xml_file): tree = ET.parse(xml_file) participant_id = next(elem.text for elem in tree.iter() if "ParticipantId" in elem.tag) media_id = next(elem.text for elem in tree.iter() if "guid" in elem.tag) return participant_id, media_id # In[12]: test = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\ProjectData\Recordings\BqQksdKnyUK4EYy9bga5Yg.rec" tree = ET.parse(test) # In[16]: for el in tree.iter(): if("guid" in el.tag): print(el.text) # In[18]: #define key mappings recording_files = [os.path.join(recordings_path,file) for file in os.listdir(recordings_path) if "rec" in file] recordings = {} for idx,file in enumerate(recording_files): participant_id, media_id = get_recording_keys(file) recordings[idx] = {participant_id: "" , media_id: ""} # In[26]: x = recordings[1].keys() # In[29]: x # In[30]: '2872c273-20e0-4ed3-a3bf-1908a208a760' in x # In[34]: for file in os.listdir(participants_path): file = os.path.join(participants_path,file) # get participant key participant_key,participant_name = get_participant_keys(file) for key in recordings.keys(): if (participant_key in recordings[key].keys()): recordings[key][participant_key] = participant_name # In[37]: media_files = [os.path.join(media_path,file) for file in os.listdir(media_path) if "xml" in file] for file in media_files: # get participant key media_key,recording_filename = get_media_keys(file) for key in recordings.keys(): if (media_key in recordings[key].keys()): recordings[key][media_key] = recording_filename # In[40]: import json recordings_metadata_file = r"C:\Users\jeuux\Desktop\Carrera\MoAI\TFM\AnnotatedData\FinalDatasets\Datasets\Video_Dataset_meta\recordings_meta.json" with open(recordings_metadata_file,"w") as f: json.dump(recordings,f) # In[3]: # participant_keys = [get_hash_root(os.path.join(xml_dir,xml_file)) for xml_file in os.listdir(participants_path)] media_keys = [get_hash_root(os.path.join(media_path,xml_file)) for xml_file in os.listdir(media_path) if "xml" in xml_file] participant_keys = {} for xml_file in os.listdir(participants_path): if("rec" in xml_file): key,participant = get_key_name(os.path.join(participants_path,xml_file)) participant_keys[key] = participant participant_ids = [get_participant_id(os.path.join(recordings_path,xml_file))for xml_file in os.listdir(recordings_path) if "rec" in xml_file] # In[ ]: # In[6]: ext_rec_id = "b27628c4-2e7f-4e8e-8dd3-d933d165f04e" key = "b124a406-a7d2-42c9-b811-8cbd6e06b962" media_dp1 = "dcd22fbf-4ea2-49cf-90da-463d9e49e9ae" for keys in media_keys: if(media_dp1==keys): print("Hi") # In[ ]: media_keys.values() # In[ ]: xml_file = os.path.join(participants_path,xml_file) tree = ET.parse(xml_file) root = tree.getroot() key = root.find("Key").text participant = root.find("Name").text # In[ ]: key # In[ ]: for e in elem.iter(): print(e) # In[ ]: tree.getroot() # In[ ]: for keys in participant_ids: try: next(media_key for media_key in media_keys if media_key==key) print ("Match!") except: print("there is not any match") # In[ ]: len(participant_keys)==len(media_keys) # In[ ]: print(test_key in participant_keys) # In[ ]: def find_recording(recordings_path,hash_root): for file in os.listdir(recordings_path): if("mp4" in file): root = file.split("==")[0] if(root==hash_root): print(root,hash_root) return True # In[ ]: for xml_file in os.listdir(xml_dir): hash_root = get_hash_root(os.path.join(xml_dir,xml_file)) find_recording(recordings_path,hash_root) # In[ ]: test_hash = "4KpMREZBURFy2YS6VpeYYw" for file in os.listdir(xml_dir): print(get_file_root(file) in test_hash) # In[ ]: def get_file_root(path): base=os.path.basename(path) return os.path.splitext(base)[0]