Need Faster Word Builder Algorithm

I have an app that assists studying for Scrabble. Most searches are much faster than by desktop version in C#, except the Word Builder. This search shows all the words that can be formed from a given set of letters A-Z, or blanks.
What can I do to get it to run faster?
I've considered using a Trie, but haven't found a way to support the use of blanks.
I am using a SimpleCursorAdapter to populate the ListView, which is why I am returning a cursor.

    public Cursor getCursor_subanagrams(String term, String filters, String ordering) {

    if (term.trim() == "")

        return null;

    // only difference between this and anagram is changing the length filter

    char a = term.toCharArray(); // anagram



    int first = new int[26]; // letter count of anagram

    int c; // array position

    int blankcount = 0;



    // initialize word to anagram

    for (c = 0; c < a.length; c++) {

        if (a[c] == '?') {

            blankcount++;

            continue;

        }

        first[a[c] - 'A']++;

    }



// gets pool of words to search through

    String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());

    Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +

            "InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +

            "FROM     `" + LexData.getLexName() + "` n" +

            "WHERE (" + lenFilter +

            filters +

            " ) " + ordering, null);



// creates new cursor to add valid words to

    MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",

            "Anagrams", "ProbFactor", "OPlayFactor", "Score"});



// THIS NEEDS TO BE FASTER

    while (cursor.moveToNext()) {

        String word = cursor.getString(1);

        char b = word.toCharArray();

        if (isAnagram(first, b, blankcount)) {

            matrixCursor.addRow(get_CursorRow(cursor));

        }

    }

    cursor.close();

    return matrixCursor;

}





private boolean isAnagram(int anagram, char word, int blankcount) {

    int matchcount = blankcount;

    int c; // each letter

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};



    for (c = 0; c < word.length; c++)

        second[word[c] - 'A']++;



    for (c = 0; c < 26; c++)

    {

        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];

    }



    if (matchcount == word.length)

        return true;

    return false;

    }

asked Nov 22 '18 at 16:05

Dana Bell

246

Try doing all the "non UI" work on a separate thread, if you're not doing it already.

– HB.
Nov 22 '18 at 18:11

This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.

– Dana Bell
Nov 22 '18 at 22:54

add a comment |

    public Cursor getCursor_subanagrams(String term, String filters, String ordering) {

    if (term.trim() == "")

        return null;

    // only difference between this and anagram is changing the length filter

    char a = term.toCharArray(); // anagram



    int first = new int[26]; // letter count of anagram

    int c; // array position

    int blankcount = 0;



    // initialize word to anagram

    for (c = 0; c < a.length; c++) {

        if (a[c] == '?') {

            blankcount++;

            continue;

        }

        first[a[c] - 'A']++;

    }



// gets pool of words to search through

    String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());

    Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +

            "InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +

            "FROM     `" + LexData.getLexName() + "` n" +

            "WHERE (" + lenFilter +

            filters +

            " ) " + ordering, null);



// creates new cursor to add valid words to

    MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",

            "Anagrams", "ProbFactor", "OPlayFactor", "Score"});



// THIS NEEDS TO BE FASTER

    while (cursor.moveToNext()) {

        String word = cursor.getString(1);

        char b = word.toCharArray();

        if (isAnagram(first, b, blankcount)) {

            matrixCursor.addRow(get_CursorRow(cursor));

        }

    }

    cursor.close();

    return matrixCursor;

}





private boolean isAnagram(int anagram, char word, int blankcount) {

    int matchcount = blankcount;

    int c; // each letter

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};



    for (c = 0; c < word.length; c++)

        second[word[c] - 'A']++;



    for (c = 0; c < 26; c++)

    {

        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];

    }



    if (matchcount == word.length)

        return true;

    return false;

    }

asked Nov 22 '18 at 16:05

Dana Bell

246

Try doing all the "non UI" work on a separate thread, if you're not doing it already.

– HB.
Nov 22 '18 at 18:11

This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.

– Dana Bell
Nov 22 '18 at 22:54

add a comment |

    public Cursor getCursor_subanagrams(String term, String filters, String ordering) {

    if (term.trim() == "")

        return null;

    // only difference between this and anagram is changing the length filter

    char a = term.toCharArray(); // anagram



    int first = new int[26]; // letter count of anagram

    int c; // array position

    int blankcount = 0;



    // initialize word to anagram

    for (c = 0; c < a.length; c++) {

        if (a[c] == '?') {

            blankcount++;

            continue;

        }

        first[a[c] - 'A']++;

    }



// gets pool of words to search through

    String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());

    Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +

            "InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +

            "FROM     `" + LexData.getLexName() + "` n" +

            "WHERE (" + lenFilter +

            filters +

            " ) " + ordering, null);



// creates new cursor to add valid words to

    MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",

            "Anagrams", "ProbFactor", "OPlayFactor", "Score"});



// THIS NEEDS TO BE FASTER

    while (cursor.moveToNext()) {

        String word = cursor.getString(1);

        char b = word.toCharArray();

        if (isAnagram(first, b, blankcount)) {

            matrixCursor.addRow(get_CursorRow(cursor));

        }

    }

    cursor.close();

    return matrixCursor;

}





private boolean isAnagram(int anagram, char word, int blankcount) {

    int matchcount = blankcount;

    int c; // each letter

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};



    for (c = 0; c < word.length; c++)

        second[word[c] - 'A']++;



    for (c = 0; c < 26; c++)

    {

        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];

    }



    if (matchcount == word.length)

        return true;

    return false;

    }

asked Nov 22 '18 at 16:05

Dana Bell

246

    public Cursor getCursor_subanagrams(String term, String filters, String ordering) {

    if (term.trim() == "")

        return null;

    // only difference between this and anagram is changing the length filter

    char a = term.toCharArray(); // anagram



    int first = new int[26]; // letter count of anagram

    int c; // array position

    int blankcount = 0;



    // initialize word to anagram

    for (c = 0; c < a.length; c++) {

        if (a[c] == '?') {

            blankcount++;

            continue;

        }

        first[a[c] - 'A']++;

    }



// gets pool of words to search through

    String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());

    Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +

            "InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +

            "FROM     `" + LexData.getLexName() + "` n" +

            "WHERE (" + lenFilter +

            filters +

            " ) " + ordering, null);



// creates new cursor to add valid words to

    MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",

            "Anagrams", "ProbFactor", "OPlayFactor", "Score"});



// THIS NEEDS TO BE FASTER

    while (cursor.moveToNext()) {

        String word = cursor.getString(1);

        char b = word.toCharArray();

        if (isAnagram(first, b, blankcount)) {

            matrixCursor.addRow(get_CursorRow(cursor));

        }

    }

    cursor.close();

    return matrixCursor;

}





private boolean isAnagram(int anagram, char word, int blankcount) {

    int matchcount = blankcount;

    int c; // each letter

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};



    for (c = 0; c < word.length; c++)

        second[word[c] - 'A']++;



    for (c = 0; c < 26; c++)

    {

        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];

    }



    if (matchcount == word.length)

        return true;

    return false;

    }

java android algorithm simplecursoradapter

asked Nov 22 '18 at 16:05

Dana Bell

246

asked Nov 22 '18 at 16:05

Dana Bell

246

asked Nov 22 '18 at 16:05

Dana Bell

246

asked Nov 22 '18 at 16:05

Dana Bell

246

asked Nov 22 '18 at 16:05

Dana Bell

246

Try doing all the "non UI" work on a separate thread, if you're not doing it already.

– HB.
Nov 22 '18 at 18:11

This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.

– Dana Bell
Nov 22 '18 at 22:54

add a comment |

Try doing all the "non UI" work on a separate thread, if you're not doing it already.

– HB.
Nov 22 '18 at 18:11

This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.

– Dana Bell
Nov 22 '18 at 22:54

Try doing all the "non UI" work on a separate thread, if you're not doing it already.

– HB.
Nov 22 '18 at 18:11

This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.

– Dana Bell
Nov 22 '18 at 22:54

add a comment |

3 Answers
3

active

oldest

votes

Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word out of anagram then you can avoid the expensive test.

One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word that are not in anagram is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.

You can precompute the bitmasks like this:

private int letterMask(char word)

{

    int c, mask = 0;

    for (c = 0; c < word.length; c++)

        mask |= (1 << (word[c] - 'A'));

    return mask;

}

Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term and store in termMask. Then inside your cursor loop you can do a test like this:

// compute mask of bits in mask that are not in term:

int missingLettersMask = cursor.getInt(8) & ~termMask;

if(missingLettersMask != 0)

{

    // check if we could possibly make up for these letters using blanks:

    int remainingBlanks = blankcount;

    while((remainingBlanks-- > 0) && (missingLettersMask != 0))

        missingLettersMask &= (missingLettersMask - 1); // remove one bit



    if(missingLettersMask != 0)

        continue; // move onto the next word

}



// word can potentially be made from anagram, call isAnagram:

edited Nov 23 '18 at 8:34

answered Nov 23 '18 at 0:29

samgak

19k33161

Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.

– Dana Bell
Nov 24 '18 at 16:22

Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.

– Dana Bell
Nov 24 '18 at 16:29

That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.

– samgak
Nov 25 '18 at 2:18

Do you get much of a speed up by removing the ordering clause?

– samgak
Nov 26 '18 at 1:32

The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.

– Dana Bell
Nov 26 '18 at 17:18

add a comment |

There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.

You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:

Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

Insert the sorted word into the trie.

Where you would place the end-marker in a normal trie, you place a list of possible words.

If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:

Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

Partial anagrams can be found by ignoring the next letter without descending in the trie.

Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.

When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.

You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.

A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:

function find_r(t, str, blanks, visited)

{

    // don't revisit explored paths

    key = make_key(t, str, blanks);



    if (key in visited) return ;

    visited ~= key;



    if (str.length == 0 and blanks == 0) {   

        // all resources have been used: return list of anagrams        

        return t.word;

    } else {

        res = ;

        c = 0;



        if (str.length > 0) {

            c = str[0];



            // regular traversal: use current letter and descend

            if (c in t.next) {

                res ~= find_r(t.next[c], str[1:], blanks, visited);

            }



            # partial anagrams: skip current letter and don't descend

            l = 1

            while (l < str.length and str[l] == c) l++;



            res ~= find_r(t, str[l:], blanks, visited);

        }



        if (blanks > 0) {

            // blanks: decrease blanks and descend

            for (i in t.next) {

                if (i < c) {

                    res ~= find_r(t.next[i], str, blanks - 1, visited);

                }

            }

        }



        return res;

    }

}

(Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)

Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.

Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.

edited Nov 24 '18 at 16:25

answered Nov 24 '18 at 8:53

M Oehm

21.4k31832

Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.

– Dana Bell
Nov 24 '18 at 17:15

add a comment |

I used samgak's recommendation to check for "mismatches".
Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.

This speeded things up about 20% according to initial tests with an emulator.
I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.

    private boolean isAnagram(int anagram, char word, int blankcount) {

    // anagram is letters in term

    int matchcount = blankcount;

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};



    for (c = 0; c < word.length; c++)

        second[word[c] - 'A']++;



    for (c = 0; c < 26; c++)

    {

        mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);             

        if (mismatchcount > blankcount)

            return false;

        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];

    }



    if (matchcount == word.length)

        return true;

    return false;

}

EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.

    private boolean isAnagram(int anagram, char word, int blankcount) {

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};

    int letter = 0;



    for (c = 0; c < word.length; c++) {

        letter = ++second[word[c] - 'A'];

        mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);

        if (mismatchcount > blankcount)

            return false;

    }

    return true;

}

edited Dec 8 '18 at 15:50

answered Dec 7 '18 at 17:17

Dana Bell

246

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53434695%2fneed-faster-word-builder-algorithm%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

3 Answers
3

active

oldest

votes

3 Answers
3

active

oldest

votes

You can precompute the bitmasks like this:

private int letterMask(char word)

{

    int c, mask = 0;

    for (c = 0; c < word.length; c++)

        mask |= (1 << (word[c] - 'A'));

    return mask;

}

// compute mask of bits in mask that are not in term:

int missingLettersMask = cursor.getInt(8) & ~termMask;

if(missingLettersMask != 0)

{

    // check if we could possibly make up for these letters using blanks:

    int remainingBlanks = blankcount;

    while((remainingBlanks-- > 0) && (missingLettersMask != 0))

        missingLettersMask &= (missingLettersMask - 1); // remove one bit



    if(missingLettersMask != 0)

        continue; // move onto the next word

}



// word can potentially be made from anagram, call isAnagram:

edited Nov 23 '18 at 8:34

answered Nov 23 '18 at 0:29

samgak

19k33161

Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.

– Dana Bell
Nov 24 '18 at 16:22

Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.

– Dana Bell
Nov 24 '18 at 16:29

That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.

– samgak
Nov 25 '18 at 2:18

Do you get much of a speed up by removing the ordering clause?

– samgak
Nov 26 '18 at 1:32

The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.

– Dana Bell
Nov 26 '18 at 17:18

add a comment |

You can precompute the bitmasks like this:

private int letterMask(char word)

{

    int c, mask = 0;

    for (c = 0; c < word.length; c++)

        mask |= (1 << (word[c] - 'A'));

    return mask;

}

// compute mask of bits in mask that are not in term:

int missingLettersMask = cursor.getInt(8) & ~termMask;

if(missingLettersMask != 0)

{

    // check if we could possibly make up for these letters using blanks:

    int remainingBlanks = blankcount;

    while((remainingBlanks-- > 0) && (missingLettersMask != 0))

        missingLettersMask &= (missingLettersMask - 1); // remove one bit



    if(missingLettersMask != 0)

        continue; // move onto the next word

}



// word can potentially be made from anagram, call isAnagram:

edited Nov 23 '18 at 8:34

answered Nov 23 '18 at 0:29

samgak

19k33161

Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.

– Dana Bell
Nov 24 '18 at 16:22

Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.

– Dana Bell
Nov 24 '18 at 16:29

That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.

– samgak
Nov 25 '18 at 2:18

Do you get much of a speed up by removing the ordering clause?

– samgak
Nov 26 '18 at 1:32

The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.

– Dana Bell
Nov 26 '18 at 17:18

add a comment |

You can precompute the bitmasks like this:

private int letterMask(char word)

{

    int c, mask = 0;

    for (c = 0; c < word.length; c++)

        mask |= (1 << (word[c] - 'A'));

    return mask;

}

// compute mask of bits in mask that are not in term:

int missingLettersMask = cursor.getInt(8) & ~termMask;

if(missingLettersMask != 0)

{

    // check if we could possibly make up for these letters using blanks:

    int remainingBlanks = blankcount;

    while((remainingBlanks-- > 0) && (missingLettersMask != 0))

        missingLettersMask &= (missingLettersMask - 1); // remove one bit



    if(missingLettersMask != 0)

        continue; // move onto the next word

}



// word can potentially be made from anagram, call isAnagram:

edited Nov 23 '18 at 8:34

answered Nov 23 '18 at 0:29

samgak

19k33161

You can precompute the bitmasks like this:

private int letterMask(char word)

{

    int c, mask = 0;

    for (c = 0; c < word.length; c++)

        mask |= (1 << (word[c] - 'A'));

    return mask;

}

// compute mask of bits in mask that are not in term:

int missingLettersMask = cursor.getInt(8) & ~termMask;

if(missingLettersMask != 0)

{

    // check if we could possibly make up for these letters using blanks:

    int remainingBlanks = blankcount;

    while((remainingBlanks-- > 0) && (missingLettersMask != 0))

        missingLettersMask &= (missingLettersMask - 1); // remove one bit



    if(missingLettersMask != 0)

        continue; // move onto the next word

}



// word can potentially be made from anagram, call isAnagram:

edited Nov 23 '18 at 8:34

answered Nov 23 '18 at 0:29

samgak

19k33161

edited Nov 23 '18 at 8:34

answered Nov 23 '18 at 0:29

samgak

19k33161

answered Nov 23 '18 at 0:29

samgak

19k33161

answered Nov 23 '18 at 0:29

samgak

19k33161

Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.

– Dana Bell
Nov 24 '18 at 16:22

Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.

– Dana Bell
Nov 24 '18 at 16:29

That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.

– samgak
Nov 25 '18 at 2:18

Do you get much of a speed up by removing the ordering clause?

– samgak
Nov 26 '18 at 1:32

The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.

– Dana Bell
Nov 26 '18 at 17:18

add a comment |

Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.

– Dana Bell
Nov 24 '18 at 16:22

Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.

– Dana Bell
Nov 24 '18 at 16:29

That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.

– samgak
Nov 25 '18 at 2:18

Do you get much of a speed up by removing the ordering clause?

– samgak
Nov 26 '18 at 1:32

The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.

– Dana Bell
Nov 26 '18 at 17:18

Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.

– Dana Bell
Nov 24 '18 at 16:22

Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.

– Dana Bell
Nov 24 '18 at 16:29

That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.

– samgak
Nov 25 '18 at 2:18

Do you get much of a speed up by removing the ordering clause?

– samgak
Nov 26 '18 at 1:32

The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.

– Dana Bell
Nov 26 '18 at 17:18

add a comment |

You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:

Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

Insert the sorted word into the trie.

Where you would place the end-marker in a normal trie, you place a list of possible words.

Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

Partial anagrams can be found by ignoring the next letter without descending in the trie.

Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.

function find_r(t, str, blanks, visited)

{

    // don't revisit explored paths

    key = make_key(t, str, blanks);



    if (key in visited) return ;

    visited ~= key;



    if (str.length == 0 and blanks == 0) {   

        // all resources have been used: return list of anagrams        

        return t.word;

    } else {

        res = ;

        c = 0;



        if (str.length > 0) {

            c = str[0];



            // regular traversal: use current letter and descend

            if (c in t.next) {

                res ~= find_r(t.next[c], str[1:], blanks, visited);

            }



            # partial anagrams: skip current letter and don't descend

            l = 1

            while (l < str.length and str[l] == c) l++;



            res ~= find_r(t, str[l:], blanks, visited);

        }



        if (blanks > 0) {

            // blanks: decrease blanks and descend

            for (i in t.next) {

                if (i < c) {

                    res ~= find_r(t.next[i], str, blanks - 1, visited);

                }

            }

        }



        return res;

    }

}

(Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)

edited Nov 24 '18 at 16:25

answered Nov 24 '18 at 8:53

M Oehm

21.4k31832

Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.

– Dana Bell
Nov 24 '18 at 17:15

add a comment |

You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:

Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

Insert the sorted word into the trie.

Where you would place the end-marker in a normal trie, you place a list of possible words.

Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

Partial anagrams can be found by ignoring the next letter without descending in the trie.

Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.

function find_r(t, str, blanks, visited)

{

    // don't revisit explored paths

    key = make_key(t, str, blanks);



    if (key in visited) return ;

    visited ~= key;



    if (str.length == 0 and blanks == 0) {   

        // all resources have been used: return list of anagrams        

        return t.word;

    } else {

        res = ;

        c = 0;



        if (str.length > 0) {

            c = str[0];



            // regular traversal: use current letter and descend

            if (c in t.next) {

                res ~= find_r(t.next[c], str[1:], blanks, visited);

            }



            # partial anagrams: skip current letter and don't descend

            l = 1

            while (l < str.length and str[l] == c) l++;



            res ~= find_r(t, str[l:], blanks, visited);

        }



        if (blanks > 0) {

            // blanks: decrease blanks and descend

            for (i in t.next) {

                if (i < c) {

                    res ~= find_r(t.next[i], str, blanks - 1, visited);

                }

            }

        }



        return res;

    }

}

(Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)

edited Nov 24 '18 at 16:25

answered Nov 24 '18 at 8:53

M Oehm

21.4k31832

Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.

– Dana Bell
Nov 24 '18 at 17:15

add a comment |

You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:

Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

Insert the sorted word into the trie.

Where you would place the end-marker in a normal trie, you place a list of possible words.

Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

Partial anagrams can be found by ignoring the next letter without descending in the trie.

Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.

function find_r(t, str, blanks, visited)

{

    // don't revisit explored paths

    key = make_key(t, str, blanks);



    if (key in visited) return ;

    visited ~= key;



    if (str.length == 0 and blanks == 0) {   

        // all resources have been used: return list of anagrams        

        return t.word;

    } else {

        res = ;

        c = 0;



        if (str.length > 0) {

            c = str[0];



            // regular traversal: use current letter and descend

            if (c in t.next) {

                res ~= find_r(t.next[c], str[1:], blanks, visited);

            }



            # partial anagrams: skip current letter and don't descend

            l = 1

            while (l < str.length and str[l] == c) l++;



            res ~= find_r(t, str[l:], blanks, visited);

        }



        if (blanks > 0) {

            // blanks: decrease blanks and descend

            for (i in t.next) {

                if (i < c) {

                    res ~= find_r(t.next[i], str, blanks - 1, visited);

                }

            }

        }



        return res;

    }

}

(Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)

edited Nov 24 '18 at 16:25

answered Nov 24 '18 at 8:53

M Oehm

21.4k31832

You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:

Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

Insert the sorted word into the trie.

Where you would place the end-marker in a normal trie, you place a list of possible words.

Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

Partial anagrams can be found by ignoring the next letter without descending in the trie.

Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.

function find_r(t, str, blanks, visited)

{

    // don't revisit explored paths

    key = make_key(t, str, blanks);



    if (key in visited) return ;

    visited ~= key;



    if (str.length == 0 and blanks == 0) {   

        // all resources have been used: return list of anagrams        

        return t.word;

    } else {

        res = ;

        c = 0;



        if (str.length > 0) {

            c = str[0];



            // regular traversal: use current letter and descend

            if (c in t.next) {

                res ~= find_r(t.next[c], str[1:], blanks, visited);

            }



            # partial anagrams: skip current letter and don't descend

            l = 1

            while (l < str.length and str[l] == c) l++;



            res ~= find_r(t, str[l:], blanks, visited);

        }



        if (blanks > 0) {

            // blanks: decrease blanks and descend

            for (i in t.next) {

                if (i < c) {

                    res ~= find_r(t.next[i], str, blanks - 1, visited);

                }

            }

        }



        return res;

    }

}

(Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)

edited Nov 24 '18 at 16:25

answered Nov 24 '18 at 8:53

M Oehm

21.4k31832

edited Nov 24 '18 at 16:25

answered Nov 24 '18 at 8:53

M Oehm

21.4k31832

answered Nov 24 '18 at 8:53

M Oehm

21.4k31832

answered Nov 24 '18 at 8:53

M Oehm

21.4k31832

Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.

– Dana Bell
Nov 24 '18 at 17:15

add a comment |

Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.

– Dana Bell
Nov 24 '18 at 17:15

Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.

– Dana Bell
Nov 24 '18 at 17:15

add a comment |

    private boolean isAnagram(int anagram, char word, int blankcount) {

    // anagram is letters in term

    int matchcount = blankcount;

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};



    for (c = 0; c < word.length; c++)

        second[word[c] - 'A']++;



    for (c = 0; c < 26; c++)

    {

        mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);             

        if (mismatchcount > blankcount)

            return false;

        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];

    }



    if (matchcount == word.length)

        return true;

    return false;

}

EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.

    private boolean isAnagram(int anagram, char word, int blankcount) {

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};

    int letter = 0;



    for (c = 0; c < word.length; c++) {

        letter = ++second[word[c] - 'A'];

        mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);

        if (mismatchcount > blankcount)

            return false;

    }

    return true;

}

edited Dec 8 '18 at 15:50

answered Dec 7 '18 at 17:17

Dana Bell

246

add a comment |

    private boolean isAnagram(int anagram, char word, int blankcount) {

    // anagram is letters in term

    int matchcount = blankcount;

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};



    for (c = 0; c < word.length; c++)

        second[word[c] - 'A']++;



    for (c = 0; c < 26; c++)

    {

        mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);             

        if (mismatchcount > blankcount)

            return false;

        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];

    }



    if (matchcount == word.length)

        return true;

    return false;

}

EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.

    private boolean isAnagram(int anagram, char word, int blankcount) {

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};

    int letter = 0;



    for (c = 0; c < word.length; c++) {

        letter = ++second[word[c] - 'A'];

        mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);

        if (mismatchcount > blankcount)

            return false;

    }

    return true;

}

edited Dec 8 '18 at 15:50

answered Dec 7 '18 at 17:17

Dana Bell

246

add a comment |

    private boolean isAnagram(int anagram, char word, int blankcount) {

    // anagram is letters in term

    int matchcount = blankcount;

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};



    for (c = 0; c < word.length; c++)

        second[word[c] - 'A']++;



    for (c = 0; c < 26; c++)

    {

        mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);             

        if (mismatchcount > blankcount)

            return false;

        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];

    }



    if (matchcount == word.length)

        return true;

    return false;

}

EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.

    private boolean isAnagram(int anagram, char word, int blankcount) {

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};

    int letter = 0;



    for (c = 0; c < word.length; c++) {

        letter = ++second[word[c] - 'A'];

        mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);

        if (mismatchcount > blankcount)

            return false;

    }

    return true;

}

edited Dec 8 '18 at 15:50

answered Dec 7 '18 at 17:17

Dana Bell

246

    private boolean isAnagram(int anagram, char word, int blankcount) {

    // anagram is letters in term

    int matchcount = blankcount;

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};



    for (c = 0; c < word.length; c++)

        second[word[c] - 'A']++;



    for (c = 0; c < 26; c++)

    {

        mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);             

        if (mismatchcount > blankcount)

            return false;

        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];

    }



    if (matchcount == word.length)

        return true;

    return false;

}

EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.

    private boolean isAnagram(int anagram, char word, int blankcount) {

    int mismatchcount = 0;

    int c;

    int second = {0,0,0,0,0, 0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0,  0,0,0,0,0, 0};

    int letter = 0;



    for (c = 0; c < word.length; c++) {

        letter = ++second[word[c] - 'A'];

        mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);

        if (mismatchcount > blankcount)

            return false;

    }

    return true;

}

edited Dec 8 '18 at 15:50

answered Dec 7 '18 at 17:17

Dana Bell

246

edited Dec 8 '18 at 15:50

answered Dec 7 '18 at 17:17

Dana Bell

246

answered Dec 7 '18 at 17:17

Dana Bell

246

answered Dec 7 '18 at 17:17

Dana Bell

246

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Ytukyg