算法模版-字符串

字符串

KMP

结论:\(s\) 有长度为 \(r\) 的 border,则 \(\mid s\mid - r\)\(s\) 的周期。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
std::vector<int> kmp(std::string &s) {
int n = s.size() - 1;
std::vector<int> fail(n + 1);
fail[0] = fail[1] = 0;
for (int i = 2, j = 0; i <= n; i++) {
while (j && s[j + 1] != s[i]) {
j = fail[j];
}
if (s[j + 1] == s[i]) {
j++;
}
fail[i] = j;
}
return fail;
}

int main() {
std::ios::sync_with_stdio(false);
std::cin.tie(nullptr);

// 求出 s 在 t 中出现的所有位置,以及 s 的 fail 数组
std::string s, t;
std::cin >> t >> s;
auto p = " " + s + "#" + t;
auto fail = kmp(p);
for (int i = s.size() + 2; i <= p.size() - 1; i++) {
if (fail[i] == s.size()) {
std::cout << i - s.size() * 2 << "\n";
}
}
p = " " + s;
fail = kmp(p);
for (int i = 1; i <= s.size(); i++) {
std::cout << fail[i] << " \n"[i == s.size()];
}

return 0;
}

最小表示法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// 输入的 s 为 0-index,返回位置为 1-index
int min_rep(std::string s) {
int n = s.size();
s = " " + s + s;
int i = 1, j = 2, k = 0;
while (i <= n && j <= n) {
while (k < n && s[i + k] == s[j + k]) {
k++;
}
if (s[i + k] > s[j + k]) {
i = i + k + 1;
} else {
j = j + k + 1;
}
if (i == j) {
j++;
}
k = 0;
}
return std::min(i, j);
}

随机生成模底双字符串哈希

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <bits/stdc++.h>

using i64 = long long;
using u64 = unsigned long long;
using u32 = unsigned;
using u128 = unsigned __int128;

bool is_prime(int n) {
if (n <= 1) {
return false;
}
for (int i = 2; i * i <= n; i++) {
if (n % i == 0) {
return false;
}
}
return true;
}

int findPrime(int n) {
while (!is_prime(n)) {
n++;
}
return n;
}

using Hash = std::array<int, 2>;

std::mt19937 rng(
std::chrono::steady_clock::now().time_since_epoch().count());
const int P1 = findPrime(rng() % 900000000 + 100000000);
const int base1 = findPrime(rng() % 10000 + 10000);
const int P2 = findPrime(rng() % 900000000 + 100000000);
const int base2 = findPrime(rng() % 10000 + 10000);

int main() {
std::ios::sync_with_stdio(false);
std::cin.tie(nullptr);


auto hash = [&](const std::string &s) -> Hash {
int x1 = 0, x2 = 0;
for (int i = 0; i < s.size(); i++) {
x1 = (1ll * x1 * base1 + s[i]) % P1;
x2 = (1ll * x2 * base2 + s[i]) % P2;
}
return {x1, x2};
};

// 以下部分是快速求子串哈希
int n = s.size();
s = " " + s;
std::vector<int> p1(n + 1), p2(n + 1);
p1[0] = p2[0] = 1;
for (int i = 1; i <= n; i++) {
p1[i] = p1[i - 1] * base1 % P1;
p2[i] = p2[i - 1] * base2 % P2;
}

std::vector<int> h1(n + 1), h2(n + 1);

for (int i = 1; i <= n; i++) {
h1[i] = (1ll * base1 * h1[i - 1] + s[i]) % P1;
h2[i] = (1ll * base2 * h2[i - 1] + s[i]) % P2;
}

// 注意,这里求的子串哈希,对应长度是 [1, r - l + 1]
// 如果想要正好对应 [l, r],需要分别乘上 p1/p2[n - r]
auto get_hash = [&](int l, int r) -> Hash {
int x1 = (h1[r] - 1ll * h1[l - 1] * p1[r - l + 1] % P1 + P1) % P1;
int x2 = (h2[r] - 1ll * h2[l - 1] * p2[r - l + 1] % P2 + P2) % P2;
return {x1, x2};
};

// 例:不同字符串的个数
int n;
std::cin >> n;
std::set<Hash> st;


for (int i = 1; i <= n; i++) {
std::string s;
std::cin >> s;
Hash h = hash(s);
st.insert(h);
}

std::cout << st.size() << "\n";



return 0;
}

Trie

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
struct Trie {
static constexpr int ALPHABET = 26;
struct Node {
int len;
int cnt;
std::array<int, ALPHABET> next;
Node() : len{0}, cnt{0}, next{} {}
};
std::vector<Node> t;
Trie() { init(); }
void init() {
t.assign(2, Node());
t[0].next.fill(1);
t[0].len = -1;
t[0].cnt = -1;
}
int newNode() {
t.emplace_back();
return t.size() - 1;
}
int add(const std::string &a) {
int p = 1;
for (auto c : a) {
int x = c - 'a';
if (t[p].next[x] == 0) {
t[p].next[x] = newNode();
t[t[p].next[x]].len = t[p].len + 1;
}
p = t[p].next[x];
t[p].cnt++;
}
return p;
}

// 统计 \sum LCP
int work(const std::string &a) {
int p = 1;
int cnt = 0;
for (auto c : a) {
int x = c - 'a';
if (t[p].next[x] == 0) {
return cnt;
}
p = t[p].next[x];
cnt += t[p].cnt;
}
return cnt;
}
};

AC 自动机

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <bits/stdc++.h>

using i64 = long long;
using u64 = unsigned long long;
using u32 = unsigned;
using u128 = unsigned __int128;

struct AhoCorasick {
static constexpr int ALPHABET = 26;
struct Node {
int len;
int link;
std::array<int, ALPHABET> next;
Node() : len{0}, link{0}, next{} {}
};
std::vector<Node> t;
AhoCorasick() { init(); }
void init() {
t.assign(2, Node());
t[0].next.fill(1);
t[0].len = -1;
}
int newNode() {
t.emplace_back();
return t.size() - 1;
}
int add(const std::string &a) {
int p = 1;
for (auto c : a) {
int x = c - 'a';
if (t[p].next[x] == 0) {
t[p].next[x] = newNode();
t[t[p].next[x]].len = t[p].len + 1;
}
p = t[p].next[x];
}
return p;
}
void work() {
std::queue<int> q;
q.push(1);
while (!q.empty()) {
int x = q.front();
q.pop();

for (int i = 0; i < ALPHABET; i++) {
if (t[x].next[i] == 0) {
t[x].next[i] = t[t[x].link].next[i];
} else {
t[t[x].next[i]].link = t[t[x].link].next[i];
q.push(t[x].next[i]);
}
}
}
}
int next(int p, int x) { return t[p].next[x]; }
int link(int p) { return t[p].link; }
int len(int p) { return t[p].len; }
int size() { return t.size(); }
};

int main() {
std::ios::sync_with_stdio(false);
std::cin.tie(nullptr);

int n;
std::cin >> n;
AhoCorasick ac;
std::vector<std::string> t(n);
std::vector<int> end(n);
for (int i = 0; i < n; i++) {
std::cin >> t[i];
end[i] = ac.add(t[i]);
}
ac.work();
std::string s;
std::cin >> s;
int p = 1;
std::vector<int> f(ac.size());
for (auto c : s) {
p = ac.next(p, c - 'a');
f[p]++;
}
std::vector<std::vector<int>> adj(ac.size());
for (int i = 2; i < ac.size(); i++) {
adj[ac.link(i)].push_back(i);
}
auto dfs = [&](auto dfs, int x) -> void {
for (auto y : adj[x]) {
dfs(dfs, y);
f[x] += f[y];
}
};
dfs(dfs, 1);
for (int i = 0; i < n; i++) {
std::cout << f[end[i]] << "\n";
}

return 0;
}

Z 函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
std::vector<int> Z(std::string &s) {
int n = s.size();
std::vector<int> z(n + 1);
for (int i = 1, j = 1; i < n; i++) {
z[i] = std::max(0, std::min(j + z[j] - i, z[i - j]));
while (i + z[i] < n && s[z[i]] == s[i + z[i]]) {
z[i]++;
}
if (i + z[i] > j + z[j]) {
j = i;
}
}
return z;
}

Manacher

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
std::vector<int> manacher(std::string &s) {
std::string t = "#";
for (auto c : s) {
t += c;
t += '#';
}
int n = t.size();
std::vector<int> r(n);
for (int i = 0, j = 0; i < n; i++) {
if (2 * j - i >= 0 && j + r[j] > i) {
r[i] = std::min(r[2 * j - i], j + r[j] - i);
}
while (i - r[i] >= 0 && i + r[i] < n && t[i - r[i]] == t[i + r[i]]) {
r[i] += 1;
}
if (i + r[i] > j + r[j]) {
j = i;
}
}
return r;
}

序列自动机

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
std::string s;
std::cin >> s;
int n = s.size();
s = " " + s;
std::vector<int> tree(26, -1);
std::vector<std::vector<int>> ne(n + 1, std::vector<int>(26, -1));

for (int i = n; i >= 1; i--) {
for (int j = 0; j < 26; j++) {
ne[i][j] = tree[j];
}
tree[s[i] - 'a'] = i;
}
// 0-index
auto check = [&](const std::string &s) {
int start = tree[s[0] - 'a'];
if (start == -1) {
return false;
}
for (int i = 1; i < s.size(); i++) {
start = ne[start][s[i] - 'a'];
if (start == -1) {
return false;
}
}
return true;
};

后缀自动机

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#include <bits/stdc++.h>

using i64 = long long;
using u64 = unsigned long long;
using u32 = unsigned;
using u128 = unsigned __int128;

struct SAM {
static constexpr int ALPHABET_SIZE = 26;
struct Node {
int len;
int link;
std::array<int, ALPHABET_SIZE> next;
Node() : len{}, link{}, next{} {}
};
std::vector<Node> t;
SAM() { init(); }
void init() {
t.assign(2, Node());
t[0].next.fill(1);
t[0].len = -1;
}
int newNode() {
t.emplace_back();
return t.size() - 1;
}
int extend(int p, int c) {
if (t[p].next[c]) {
int q = t[p].next[c];
if (t[q].len == t[p].len + 1) {
return q;
}
int r = newNode();
t[r].len = t[p].len + 1;
t[r].link = t[q].link;
t[r].next = t[q].next;
t[q].link = r;
while (t[p].next[c] == q) {
t[p].next[c] = r;
p = t[p].link;
}
return r;
}
int cur = newNode();
t[cur].len = t[p].len + 1;
while (!t[p].next[c]) {
t[p].next[c] = cur;
p = t[p].link;
}
t[cur].link = extend(p, c);
return cur;
}
int extend(int p, char c, char offset = 'a') {
return extend(p, c - offset);
}
int next(int p, int x) { return t[p].next[x]; }
int next(int p, char c, char offset = 'a') { return next(p, c - offset); }
int link(int p) { return t[p].link; }
int len(int p) { return t[p].len; }
int size() { return t.size(); }
};

int main() {
std::ios::sync_with_stdio(false);
std::cin.tie(nullptr);

std::string s;
std::cin >> s;
SAM sam;
std::vector<int> x;
int p = 1;
for (int i = 0; i < s.size(); i++) {
x.push_back(p = sam.extend(p, s[i] - 'a'));
}
std::vector<int> f(sam.size());
for (auto v : x) {
f[v]++;
}
std::vector<std::vector<int>> adj(sam.size());
for (int i = 2; i < sam.size(); i++) {
adj[sam.link(i)].push_back(i);
}
i64 ans = 0;
auto dfs = [&](auto dfs, int x) -> void {
for (auto y : adj[x]) {
dfs(dfs, y);
f[x] += f[y];
}
if (f[x] > 1) {
ans = std::max(ans, 1ll * f[x] * sam.len(x));
}
};
dfs(dfs, 1);
std::cout << ans << "\n";

return 0;
}