2020 ICPC 沈阳现场赛 B Whispers of the Old Gods 题解

题面

给定一个长度为 $5000$ 的正则表达式和一个长度为 $10000$ 的文本串,求文本串最少修改多少次能够被正则表达式识别。

题解

对正则表达式构造 NFA,令 $f(l,i)$ 表达式使用完前 $l$ 个字符后,到达了 NFA 的状态 $i$ 时的最小修改次数,转移方程的形式为求一个 $0/1$ 最短路。

XLex 正则表达式

NFA 的构造,注意构造过程中不能随意删除中间的 $\epsilon$ 边:

12

3

4

5

如果随意删除中间的 $\epsilon$ 边,可能会有如下的反例:

反例1

反例2

上图为 1+2+,而下图为 (1+2+)+

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#include <iostream>
#include <cassert>
#include <functional>
#include <algorithm>
#include <utility>
#include <vector>
#include <string>
#include <set>
#include <deque>
using namespace std;
using PII = pair<int,int>;
const int inf = 1 << 30;
const int maxn = 200000 + 5;

bool isDigit(char ch) {
return '0' <= ch && ch <= '9';
}

using State = vector<PII>;

const int Epsilon = 0;

class Nfa : public vector<State> {
private:
const string& pat;
int cur = 0;

int createState() {
int root = size();
push_back(State());
return root;
}

PII parseAtom() {
if (isDigit(pat[cur])) {
int begin = createState();
int end = createState();
at(begin).emplace_back((int)pat[cur], end);
cur++;
return { begin, end };
} else if (pat[cur] == '[') {
cur++;
int begin = createState();
int end = createState();
set<int> chs;
while (cur < pat.length() && isDigit(pat[cur])) {
chs.insert((int)pat[cur++]);
}
assert(pat[cur++] == ']');
for (int ch: chs) at(begin).emplace_back(ch, end);
return { begin, end };
} else {
assert(pat[cur++] == '(');
auto node = parseAlt();
assert(pat[cur++] == ')');
return node;
}
}

PII parseReg() {
int begin = createState();
int end = begin;
auto peekPlus = [&]() {
while (cur < pat.length() && pat[cur] == '+') cur++;
};
while (cur < pat.length() && (isDigit(pat[cur]) || pat[cur] == '[' || pat[cur] == '(')) {
auto next = parseAtom();
if (cur < pat.length() && pat[cur] == '+') {
peekPlus();
at(end).emplace_back(Epsilon, next.first);
at(next.second).emplace_back(Epsilon, next.first);
end = next.second;
} else {
at(end).emplace_back(Epsilon, next.first);
end = next.second;
}
}
return { begin, end };
}

PII parseAlt() {
int begin = createState();
auto next = parseReg();
at(begin).emplace_back(Epsilon, next.first);
vector<int> subEnd { next.second };
while (cur < pat.length() && pat[cur] == '|') {
cur++;
auto next = parseReg();
at(begin).emplace_back(Epsilon, next.first);
subEnd.push_back(next.second);
}
int end = createState();
for (auto& state: subEnd) {
at(state).emplace_back(Epsilon, end);
}
return { begin, end };
}

public:
Nfa(const string& pat): pat(pat) {
parseAlt();
}
};

int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
string pat, str;
cin >> pat >> str;

Nfa nfa { pat };
int nSz = nfa.size();
int m = str.length();
auto dis = vector<vector<int>>(nSz, vector<int>(m + 1, inf));
deque<PII> dq;
dis[0][0] = 0;
dq.emplace_back(0, 0);

while (!dq.empty()) {
auto cur = dq.front();
int cdis = dis[cur.first][cur.second];
dq.pop_front();
auto relaxFront = [&](int u, int l) {
if (dis[u][l] > cdis) {
dis[u][l] = cdis;
dq.emplace_front(u, l);
}
};
auto relaxBack = [&](int u, int l) {
if (dis[u][l] > cdis + 1) {
dis[u][l] = cdis + 1;
dq.emplace_back(u, l);
}
};
if (cur.second < m) {
// delete
relaxBack(cur.first, cur.second + 1);
}
for (auto& [ch, next]: nfa[cur.first]) {
if (ch == Epsilon) {
relaxFront(next, cur.second);
} else {
if (cur.second < m) {
if (ch == str[cur.second]) {
// match
relaxFront(next, cur.second + 1);
} else {
// change
relaxBack(next, cur.second + 1);
}
}
// insert
relaxBack(next, cur.second);
}
}
}
printf("%d\n", dis[nSz - 1][m]);
return 0;
}