December 2005 Problem 'cpattern' Analysis

by Bruce Merry

We can modify the Morris-Pratt string searching algorithm to solve the problem as if it were just string matching (if you're not familiar with the Morris-Pratt algorithm - which is almost identical to the Knuth-Morris-Pratt algorithm - go look it up on the web before reading further).

For example, suppose the pattern is 1 2 3 1. If we see cows 4 7 8, we can match this to the prefix (1 2 3). If the next cow is 10, we apply the failure function and fall back to (1 2). Note that unlike in plain M-P, this prefix is not a suffix of the original. However, it has the same structure (in terms of relative values) as the original, so we can use it. Our failure functions are thus more complicated - they have to specify a remapping of the indices.

During matching, a mapping of pattern indices to spot counts is kept (with a magic -1 value for unseen indices). When the next cow scanned: 1. The new spot count is tested aganist the map for validity, i.e., if it has been seen before it must match, otherwise it must be in a legal range. If it is valid, it is appended to the current match. 2. Otherwise, the failure function is applied to shorten the current match, and the remapping is applied to the map to juggle the indices. Then step 1 is tried again.

Construction of the failure function is done similarly. The only trick is to compose the remapping functions when multiple failures occur.

Maintainence of the remapping functions and spot count maps introduces an extra factor of S, so the running time is O((K + N)S).

Here is Bruce's solution:

#include <fstream>
#include <algorithm>
#include <vector>
#include <cstdlib>
#include <iterator>

using namespace std;

#define MAXS 25

struct transition {
    int length;
    int remap[25];
};

struct state {
    int length;
    int values[25];
};

int N, K;
static vector<int> cows;
static vector<int> pattern;
static vector<transition> skip;

static void readin() {
    int S;

    ifstream in("cpattern.in");
    in >> N >> K >> S;
    cows.resize(N);
    pattern.resize(K);
    skip.resize(K + 1);
    for (int i = 0; i < N; i++)
        in >> cows[i];
    for (int i = 0; i < K; i++)
    {
        in >> pattern[i];
        pattern[i]--;
    }
}

static bool in_range(const state &s, int index, int value) {
    if (s.values[index] != -1)
        return value == s.values[index];
    else {
        int l = index;
        while (l >= 0 && s.values[l] == -1) l--;
        if (l >= 0 && s.values[l] >= value) return false;

        int h = index;
        while (h < MAXS && s.values[h] == -1) h++;
        if (h < MAXS && s.values[h] <= value) return false;

        return true;
    }
}

static state step_down(const state &s, const transition &t) {
    state ans;

    ans.length = t.length;
    for (int i = 0; i < MAXS; i++)
        if (t.remap[i] == -1) ans.values[i] = -1;
        else ans.values[i] = s.values[t.remap[i]];
    return ans;
}

// Applies a then b
static transition compose(const transition &a, const transition &b) {
    transition ans;

    ans.length = b.length;
    for (int i = 0; i < MAXS; i++)
        if (b.remap[i] == -1) ans.remap[i] = -1;
        else ans.remap[i] = a.remap[b.remap[i]];
    return ans;
}

static void generate() {
    state top, cur;

    skip[0].length = -1;
    fill(skip[0].remap, skip[0].remap + MAXS, -1);
    top.length = 0;
    fill(top.values, top.values + MAXS, -1);

    for (int i = 1; i <= K; i++) {
        cur = step_down(top, skip[i - 1]);
        skip[i] = skip[i - 1];
        while (cur.length >= 0) {
            if (in_range(cur, pattern[cur.length], pattern[i - 1])) {
                skip[i].remap[pattern[cur.length]] = pattern[i - 1];
                break;
            } else {
                skip[i] = compose(skip[i], skip[cur.length]);
                cur = step_down(cur, skip[cur.length]);
            }
        }
        skip[i].length++;

        top.values[pattern[i - 1]] = pattern[i - 1];
        top.length++;
    }
}

static void match() {
    vector<int> matches;

    state cur;
    cur.length = 0;
    fill(cur.values, cur.values + MAXS, -1);

    for (int i = 0; i < N; i++) {
        while (cur.length == K || !in_range(cur, pattern[cur.length], cows[i]))
            cur = step_down(cur, skip[cur.length]);
        cur.values[pattern[cur.length]] = cows[i];
        cur.length++;
        if (cur.length == K) matches.push_back(i - K + 2);
    }

    ofstream out("cpattern.out");
    out << matches.size() << "\n";
    copy(matches.begin(), matches.end(), ostream_iterator<int>(out, "\n"));
}

int main() {
    readin();
    generate();
    match();
    return 0;
}