diff --git a/extraction/attributeclassification/actionclassification.py b/extraction/attributeclassification/actionclassification.py
index a07dbde659887eecb0afb06bc88f4ad2aa8d5697..3add9f9caaa5b91dd8a23963ebc7c2479354d690 100644
--- a/extraction/attributeclassification/actionclassification.py
+++ b/extraction/attributeclassification/actionclassification.py
@@ -9,10 +9,15 @@ from nltk.corpus import words
 
 class ActionClassifier:
 
-    def __init__(self, config, aug_log, embeddings):
+    def __init__(self, config, aug_log=None, embeddings=None, actions=None):
         self.config = config
         self.aug_log = aug_log
-        self.actions = list([act for act in self.aug_log.get_all_unique_values_for_role(ConceptType.ACTION_NAME.value) if any(tok in words.words() for tok in act.split(" "))])
+        if aug_log is not None:
+            self.actions = list([act for act in self.aug_log.get_all_unique_values_for_role(ConceptType.ACTION_NAME.value) if any(tok in words.words() for tok in act.split(" "))])
+        elif actions is not None:
+            self.actions = actions
+        else:
+            raise Exception("Either aug_log or actions must be provided")
         self.embeddings = embeddings
         self.lemmatizer = WordNetLemmatizer()
         with open(self.config.resource_dir / 'mitphb.json') as json_file:
diff --git a/extraction/attributeclassification/simactionclassifier.py b/extraction/attributeclassification/simactionclassifier.py
new file mode 100644
index 0000000000000000000000000000000000000000..160eb349197f6c9f431802bcc2815d7c1d8ee82c
--- /dev/null
+++ b/extraction/attributeclassification/simactionclassifier.py
@@ -0,0 +1,82 @@
+import operator
+
+from nltk import WordNetLemmatizer
+
+
+class SlimActionClassifier:
+
+    def __init__(self, embeddings, action_taxonomy):
+        self.embeddings = embeddings
+        self.lemmatizer = WordNetLemmatizer()
+        action_taxonomy = action_taxonomy
+        # all unique actions
+        unique_actions_taxonomy = set()
+        # a mapping from all unique actions to their top most ancestor(s)
+        child_to_upper_level = dict()
+        # all upper level actions
+        upper_acts = set()
+        self.unique_actions_from_taxonomy(action_taxonomy, unique_actions_taxonomy, child_to_upper_level,
+                                          upper_acts)
+        self.unique_actions_taxonomy = unique_actions_taxonomy
+        self.child_to_upper_level=child_to_upper_level
+        self.upper_acts = upper_acts
+
+    def classify_actions(self, actions):
+        return {act: self.get_action_type_for_action(act) for act in actions}
+
+    def classify_action(self, action):
+        return self.get_action_type_for_action(action)
+
+    def unique_actions_from_taxonomy(self, action_taxonomy, unique_actions, child_to_upper_level, upper_acts, upper_level=None):
+        for act, children in action_taxonomy.items():
+            unique_actions.add(act)
+            if upper_level is None:
+                child_to_upper_level[act] = {act}
+                upper_acts.add(act)
+                ul = act
+            else:
+                if act in child_to_upper_level:
+                    child_to_upper_level[act].add(upper_level)
+                else:
+                    child_to_upper_level[act] = {upper_level}
+                ul = upper_level
+            for child in children:
+                self.unique_actions_from_taxonomy(child, unique_actions, child_to_upper_level, upper_acts, upper_level=ul)
+
+    def get_most_similar(self, action, taxonomy_actions, child_to_upper_level, upper_acts):
+        if len(action) < 3:
+            return "None"
+        sims = {}
+        upper_level_sims = {}
+        for tax_action in taxonomy_actions:
+            try:
+                sim = self.embeddings.embeddings.similarity(action, tax_action)
+                if tax_action in upper_acts:
+                    upper_level_sims[tax_action] = sim
+                sims[tax_action] = sim
+            except KeyError as e:
+                action = self.lemmatizer.lemmatize((action.split(" ")[-1]))
+                try:
+                    sim = self.embeddings.embeddings.similarity(action, tax_action)
+                    if tax_action in upper_acts:
+                        upper_level_sims[tax_action] = sim
+                    sims[tax_action] = sim
+                except KeyError as e:
+                    pass
+        if len(sims) == 0:
+            return "None"
+        max_sim = max(sims.items(), key=operator.itemgetter(1))[0]
+        max_sim_upper = max(upper_level_sims.items(), key=operator.itemgetter(1))[0]
+        max_sim_upper_ini = str(max_sim_upper)
+        if len(child_to_upper_level[max_sim]) == 1:
+            max_sim = list(child_to_upper_level[max_sim])[0]
+        else:
+            max_sim_upper = -1
+            for upper_level_act in child_to_upper_level[max_sim]:
+                if upper_level_sims[upper_level_act] > max_sim_upper:
+                    max_sim = upper_level_act
+                    max_sim_upper = upper_level_sims[upper_level_act]
+        return max_sim if sims[max_sim] > 0 else max_sim_upper_ini
+
+    def get_action_type_for_action(self, action):
+        return self.get_most_similar(action, self.unique_actions_taxonomy,self.child_to_upper_level, self.upper_acts)
diff --git a/extraction/data/word_embeddings.py b/extraction/data/word_embeddings.py
index a2a509befda302962568442589d2f81d050d8391..592dd01f7b64c55514dc911594a5bf5c50a9a74d 100644
--- a/extraction/data/word_embeddings.py
+++ b/extraction/data/word_embeddings.py
@@ -3,6 +3,5 @@ import gensim.downloader as api
 
 class WordEmbeddings:
 
-    def __init__(self, config):
-        self.config = config
-        self.embeddings = api.load(self.config.word_embeddings_file)
+    def __init__(self, word_embeddings_file):
+        self.embeddings = api.load(word_embeddings_file)
diff --git a/extraction/extract.py b/extraction/extract.py
index 5701770b3b05e0365f444731af54d143ff7095da..dc64fcd6a86aa23d076441d968abdf886b63ddd0 100644
--- a/extraction/extract.py
+++ b/extraction/extract.py
@@ -1,3 +1,4 @@
+import json
 import logging
 
 from pm4py.objects.log.obj import EventLog
@@ -8,6 +9,7 @@ import time
 
 from extraction.attributeclassification.actionclassification import ActionClassifier
 from extraction.attributeclassification.resourceclassifier import ResourceClassifier
+from extraction.attributeclassification.simactionclassifier import SlimActionClassifier
 from extraction.data.word_embeddings import WordEmbeddings
 from extraction.attributeclassification.attribute_classification import AttributeClassifier
 import extraction.preprocessing.preprocessor as pp
@@ -28,6 +30,7 @@ _model_folder = "model"
 _glove_model = "glove.6B.100d.txt"
 _spacy_model = "en_core_web_lg"
 _bert_model = "arebmann/model"
+_we_file = "glove-wiki-gigaword-50"
 
 
 def get_instance():
@@ -55,13 +58,16 @@ def _load_models():
         return bt
 
     bert = _load_bert()
-    return nlp_util, bert, kb
+    word_embeddings = WordEmbeddings((load_model_from_package(_module_name) / _model_folder))
+    with open(load_model_from_package(_module_name) / _model_folder / 'mitphb.json') as json_file:
+        action_taxonomy = json.load(json_file)
+    return nlp_util, bert, kb, word_embeddings, action_taxonomy
 
 
 class Extraction:
 
     def __init__(self):
-        self._nlp_util, self._bert, self._kb = _load_models()
+        self._nlp_util, self._bert, self._kb, self._embeddings, self.action_taxonomy = _load_models()
 
     def extract_roles_from_label(self, label: str) -> dict:
         """
@@ -69,10 +75,15 @@ class Extraction:
         @param label: the textual value the roles should be extracted from
         @return: a dictionary of the form {'role type 1': ['role instance 1', 'role instance 2']}
         """
-
+        action_classifier = SlimActionClassifier(self._embeddings, self.action_taxonomy)
         cleaned = preprocessor.preprocess_label(label)
         tagged = self._bert.predict_single_label(cleaned)
-        return augmented_log.get_tagged(tagged[0], tagged[1], self._nlp_util)
+        cleaned = augmented_log.get_tagged(tagged[0], tagged[1], self._nlp_util)
+        if ["action:name"] in cleaned:
+            action = action_classifier.classify_actions(cleaned["action:name"])
+            if action is not None:
+                cleaned["action:type"] = action
+        return cleaned
 
     def extract_roles_from_list_of_labels(self, labels: list) -> dict:
         """
@@ -80,9 +91,7 @@ class Extraction:
         @param labels: the textual values the roles should be extracted from
         @return: a dictionary of the form {'initial label': {'role type 1': ['role instance 1', 'role instance 2']}}
         """
-        cleaned = [preprocessor.preprocess_label(label) for label in labels]
-        tagged = [self._bert.predict_single_label(single) for single in cleaned]
-        return {labels[i]: augmented_log.get_tagged(single[0], single[1], self._nlp_util) for i, single in enumerate(tagged)}
+        return {label: self.extract_roles_from_label(label) for label in labels}
 
     def _add_resource_and_action_types(self, aug_log, res_to_type, act_to_type):
         cls_to_res = {}
@@ -140,7 +149,7 @@ class Extraction:
         toc = time.perf_counter()
         print(f"Preprocessed the current log in {toc - tic:0.4f} seconds")
         print("load word embeddings " + config.word_embeddings_file)
-        word_embeddings = WordEmbeddings(config=config)
+        word_embeddings = WordEmbeddings(config.word_embeddings_file)
         print("BERT-based semantic tagging")
         print('semantic tagging text attributes')
         tic = time.perf_counter()