Skip to content

remove classic optimize#4391

Open
klirichek wants to merge 2 commits into
masterfrom
remove_classic_optimize
Open

remove classic optimize#4391
klirichek wants to merge 2 commits into
masterfrom
remove_classic_optimize

Conversation

@klirichek

Copy link
Copy Markdown
Contributor

Only progressive is in game from now

Type of Change (select one):

  • Bug fix
  • New feature
  • Documentation update

Description of the Change:

Related Issue (provide the link):

@github-actions

Copy link
Copy Markdown
Contributor

clt

❌ CLT tests in test/clt-tests/mcl/auto-embeddings-dml-test test/clt-tests/mcl/auto-embeddings-duplicate-id test/clt-tests/mcl/auto-embeddings-edge-cases test/clt-tests/mcl/auto-embeddings-endpoints test/clt-tests/mcl/auto-embeddings-error-handling test/clt-tests/mcl/auto-embeddings-from-vector-check test/clt-tests/mcl/auto-embeddings-hnsw-configs test/clt-tests/mcl/auto-embeddings-jina-remote test/clt-tests/mcl/auto-embeddings-json-api
✅ OK: 8
❌ Failed: 1
⏳ Duration: 575s
👉 Check Action Results for commit 53b1b9a

Failed tests:

🔧 Edit failed tests in UI:

test/clt-tests/mcl/auto-embeddings-jina-remote.rec
––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
cosine_similarity() {
    local file1="$1" file2="$2"

    awk '
    NR==FNR { a[NR]=$1; suma2+=$1*$1; next }
    {
        dot += a[FNR]*$1
        sumb2 += $1*$1
    }
    END {
        print dot / (sqrt(suma2) * sqrt(sumb2))
    }' "$file1" "$file2"
}
––– output –––
OK
––– input –––
export -f cosine_similarity
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_invalid_model (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/invalid-model-name-12345' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_valid_model_no_api_key (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/jina-embeddings-v2-base-en' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_jina_remote (title TEXT, content TEXT, description TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/jina-embeddings-v4' FROM = 'title, content' API_KEY='${JINA_API_KEY}') "; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SHOW CREATE TABLE test_jina_remote"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_jina_remote (id, title, content, description) VALUES(1, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'advanced AI research')"; echo $?
––– output –––
- 0
+ ERROR 1064 (42000) at line 1: Failed to send request to remote model
+ 1
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as record_count FROM test_jina_remote WHERE id=1"
––– output –––
+--------------+
| record_count |
+--------------+
- |            1 |
+ |            0 |
+--------------+
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_jina_remote (id, title, content, description) VALUES(2, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'different description')"

mysql -h0 -P9306 -e "SELECT embedding FROM test_jina_remote WHERE id=1" | \
    grep -v embedding | \
    sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
    grep -E '^[0-9]+(\.[0-9]+)?$' | \
    awk '{printf "%.5f\n", $1}' > /tmp/vector1.txt

mysql -h0 -P9306 -e "SELECT embedding FROM test_jina_remote WHERE id=2" | \
    grep -v embedding | \
    sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
    grep -E '^[0-9]+(\.[0-9]+)?$' | \
    awk '{printf "%.5f\n", $1}' > /tmp/vector2.txt

SIMILARITY=$(cosine_similarity /tmp/vector1.txt /tmp/vector2.txt)

echo "Cosine similarity: $SIMILARITY"

RESULT=$(awk -v sim="$SIMILARITY" 'BEGIN {
    if (sim > 0.99)
        print "SUCCESS: Same FROM fields produce similar vectors (similarity: " sim ")"
    else
        print "FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: " sim ")"
}')

echo "$RESULT"

rm -f /tmp/vector1.txt /tmp/vector2.txt
––– output –––
- Cosine similarity: #!/(1|0\.[0-9]+)/!#
+ ERROR 1064 (42000) at line 1: Failed to send request to remote model
- SUCCESS: Same FROM fields produce similar vectors (similarity: #!/(1|0\.[0-9]+)/!#)
+ Cosine similarity: -nan
+ FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: -nan)
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_jina_title_only (title TEXT, content TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/jina-embeddings-v4' FROM = 'title' API_KEY='${JINA_API_KEY}') "; mysql -h0 -P9306 -e "INSERT INTO test_jina_title_only (id, title, content) VALUES(1, 'machine learning algorithms', 'completely different content here')"; MD5_MULTI=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_jina_remote WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); MD5_SINGLE=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_jina_title_only WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); echo "multi_field_md5: $MD5_MULTI"; echo "single_field_md5: $MD5_SINGLE"; if [ "$MD5_MULTI" != "$MD5_SINGLE" ]; then echo "SUCCESS: Different FROM specifications produce different vectors"; else echo "INFO: FROM field comparison result"; fi
––– output –––
- multi_field_md5: #!/[0-9a-f]{32}/!#
+ ERROR 1064 (42000) at line 1: Failed to send request to remote model
- single_field_md5: #!/[0-9a-f]{32}/!#
+ multi_field_md5: d41d8cd98f00b204e9800998ecf8427e
- SUCCESS: Different FROM specifications produce different vectors
+ single_field_md5: d41d8cd98f00b204e9800998ecf8427e
+ INFO: FROM field comparison result
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_jina_invalid_field (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/text-embedding-ada-002' FROM = 'nonexistent_field') " 2>&1
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_jina_no_from'" | grep -q test_jina_no_from; then mysql -h0 -P9306 -e "INSERT INTO test_jina_no_from (id, title, embedding) VALUES(1, 'test title', '(0.1, 0.2, 0.3, 0.4, 0.5)')"; echo "insert_result: $?"; else echo "insert_result: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_jina_no_from'" | grep -q test_jina_no_from; then mysql -h0 -P9306 -e "SHOW CREATE TABLE test_jina_no_from"; else echo "table_structure: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if [ -n "$JINA_API_KEY" ] && [ "$JINA_API_KEY" != "dummy_key_for_testing" ]; then echo "API key is available for testing"; else echo "API key not available - using dummy for error testing"; fi
––– output –––
OK

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant