隐马尔可夫模型python实现。

时间:2024-02-23 21:55:16
隐马尔可夫模型python实现。
  1. 隐马尔可夫代码实现比数学计算要简单,只需要三个矩阵:转移概率矩阵、发射概率矩阵、初始概率矩阵。
  2. 维特比算法是用来推测最后结果的。
  3. HMM实现可以作为CRF实现的基础。
class HMM:
    def __init__(self, hidden_states):
        self.hidden_states = hidden_states
        self.start = {k: 0.0 for k in hidden_states}
        self.trans = {k: {s: 0.0 for s in hidden_states} for k in hidden_states}
        self.emi = {k: {} for k in hidden_states}

    def train_single(self, hidden_state_list, state_list):
        '''
        训练一条样本
        :param hidden_state_list: 例如['B', 'M', 'E', 'S']
        :param state_list: ['娃','哈','哈','啊']
        '''
        for idx, hidden_state in enumerate(hidden_state_list):
            state = state_list[idx]
            if idx == 0:
                self.start[hidden_state] += 1
            else:
                front_hidden_state = hidden_state_list[idx-1]
                self.trans[front_hidden_state][hidden_state] += 1
            self.emi[hidden_state][state] = self.emi[hidden_state].get(state, 0) + 1

    def train(self, train_data):
        for hsl, sl in train_data:
            self.train_single(hsl, sl)
        # 进行归一化,并进行log操作,将之后的乘法变成加法。
        self.start = {k: np.log(v / np.sum(list(self.start.values()))) if v != 0 else -3e+100 for k, v in self.start.items()}
        self.trans = {k: {vk:np.log(vv / np.sum(list(v.values()))) if vv != 0 else -3e+100 for vk, vv in v.items()} for k, v in self.trans.items()}
        self.emi = {k:{vk:np.log(vv / np.sum(list(v.values()))) if vv != 0 else -3e+100 for vk, vv in v.items()} for k, v in self.emi.items()}

    def viterbi(self, state_list):
        for idx, state in enumerate(state_list):
            if idx == 0:
                current_path = {hidden_state:[hidden_state] for hidden_state in self.hidden_states}
                current_prob = {hidden_state:self.start[hidden_state] + self.emi[hidden_state].get(state, -3e+100) for hidden_state in self.hidden_states}
            else:
                temp_current_prob = {}
                temp_current_path = {}
                for cur_hs in self.hidden_states:
                    prob, hs = max([(current_prob[fr_hs] + self.trans[fr_hs].get(cur_hs, -3e+100) + self.emi[cur_hs].get(state, -3e+100), fr_hs) for fr_hs in self.hidden_states])
                    temp_current_path[cur_hs] = current_path[hs] + [cur_hs]
                    temp_current_prob[cur_hs] = prob
                current_path = temp_current_path
                current_prob = temp_current_prob
        max_prob, max_state = max([(v, k) for k, v in current_prob.items()])
        return current_path[max_state]