remove classic optimize#4391
Open
klirichek wants to merge 2 commits into
Open
Conversation
Only progressive is in game from now
Contributor
clt❌ CLT tests in Failed tests:🔧 Edit failed tests in UI: test/clt-tests/mcl/auto-embeddings-jina-remote.rec––– input –––
rm -f /var/log/manticore/searchd.log; stdbuf -oL searchd --stopwait > /dev/null; stdbuf -oL searchd ${SEARCHD_ARGS:-} > /dev/null
––– output –––
OK
––– input –––
if timeout 10 grep -qm1 'accepting connections' <(tail -n 1000 -f /var/log/manticore/searchd.log); then echo 'Accepting connections!'; else echo 'Timeout or failed!'; fi
––– output –––
OK
––– input –––
cosine_similarity() {
local file1="$1" file2="$2"
awk '
NR==FNR { a[NR]=$1; suma2+=$1*$1; next }
{
dot += a[FNR]*$1
sumb2 += $1*$1
}
END {
print dot / (sqrt(suma2) * sqrt(sumb2))
}' "$file1" "$file2"
}
––– output –––
OK
––– input –––
export -f cosine_similarity
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_invalid_model (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/invalid-model-name-12345' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_valid_model_no_api_key (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/jina-embeddings-v2-base-en' FROM = 'title') " 2>&1
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_jina_remote (title TEXT, content TEXT, description TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/jina-embeddings-v4' FROM = 'title, content' API_KEY='${JINA_API_KEY}') "; echo $?
––– output –––
OK
––– input –––
mysql -h0 -P9306 -E -e "SHOW CREATE TABLE test_jina_remote"
––– output –––
OK
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_jina_remote (id, title, content, description) VALUES(1, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'advanced AI research')"; echo $?
––– output –––
- 0
+ ERROR 1064 (42000) at line 1: Failed to send request to remote model
+ 1
––– input –––
mysql -h0 -P9306 -e "SELECT COUNT(*) as record_count FROM test_jina_remote WHERE id=1"
––– output –––
+--------------+
| record_count |
+--------------+
- | 1 |
+ | 0 |
+--------------+
––– input –––
mysql -h0 -P9306 -e "INSERT INTO test_jina_remote (id, title, content, description) VALUES(2, 'machine learning algorithms', 'deep neural networks and artificial intelligence', 'different description')"
mysql -h0 -P9306 -e "SELECT embedding FROM test_jina_remote WHERE id=1" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector1.txt
mysql -h0 -P9306 -e "SELECT embedding FROM test_jina_remote WHERE id=2" | \
grep -v embedding | \
sed 's/[0-9]\+\(\.[0-9]\+\)\?/\n&\n/g' | \
grep -E '^[0-9]+(\.[0-9]+)?$' | \
awk '{printf "%.5f\n", $1}' > /tmp/vector2.txt
SIMILARITY=$(cosine_similarity /tmp/vector1.txt /tmp/vector2.txt)
echo "Cosine similarity: $SIMILARITY"
RESULT=$(awk -v sim="$SIMILARITY" 'BEGIN {
if (sim > 0.99)
print "SUCCESS: Same FROM fields produce similar vectors (similarity: " sim ")"
else
print "FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: " sim ")"
}')
echo "$RESULT"
rm -f /tmp/vector1.txt /tmp/vector2.txt
––– output –––
- Cosine similarity: #!/(1|0\.[0-9]+)/!#
+ ERROR 1064 (42000) at line 1: Failed to send request to remote model
- SUCCESS: Same FROM fields produce similar vectors (similarity: #!/(1|0\.[0-9]+)/!#)
+ Cosine similarity: -nan
+ FAIL: Different vectors (FROM does not include description field and should not change generated vector value) (similarity: -nan)
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_jina_title_only (title TEXT, content TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/jina-embeddings-v4' FROM = 'title' API_KEY='${JINA_API_KEY}') "; mysql -h0 -P9306 -e "INSERT INTO test_jina_title_only (id, title, content) VALUES(1, 'machine learning algorithms', 'completely different content here')"; MD5_MULTI=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_jina_remote WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); MD5_SINGLE=$(mysql -h0 -P9306 -e "SELECT embedding FROM test_jina_title_only WHERE id=1" | grep -v embedding | md5sum | awk '{print $1}'); echo "multi_field_md5: $MD5_MULTI"; echo "single_field_md5: $MD5_SINGLE"; if [ "$MD5_MULTI" != "$MD5_SINGLE" ]; then echo "SUCCESS: Different FROM specifications produce different vectors"; else echo "INFO: FROM field comparison result"; fi
––– output –––
- multi_field_md5: #!/[0-9a-f]{32}/!#
+ ERROR 1064 (42000) at line 1: Failed to send request to remote model
- single_field_md5: #!/[0-9a-f]{32}/!#
+ multi_field_md5: d41d8cd98f00b204e9800998ecf8427e
- SUCCESS: Different FROM specifications produce different vectors
+ single_field_md5: d41d8cd98f00b204e9800998ecf8427e
+ INFO: FROM field comparison result
––– input –––
mysql -h0 -P9306 -e "CREATE TABLE test_jina_invalid_field (title TEXT, embedding FLOAT_VECTOR KNN_TYPE='hnsw' HNSW_SIMILARITY='l2' MODEL_NAME = 'jina/text-embedding-ada-002' FROM = 'nonexistent_field') " 2>&1
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_jina_no_from'" | grep -q test_jina_no_from; then mysql -h0 -P9306 -e "INSERT INTO test_jina_no_from (id, title, embedding) VALUES(1, 'test title', '(0.1, 0.2, 0.3, 0.4, 0.5)')"; echo "insert_result: $?"; else echo "insert_result: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if mysql -h0 -P9306 -e "SHOW TABLES LIKE 'test_jina_no_from'" | grep -q test_jina_no_from; then mysql -h0 -P9306 -e "SHOW CREATE TABLE test_jina_no_from"; else echo "table_structure: skipped (table not created)"; fi
––– output –––
OK
––– input –––
if [ -n "$JINA_API_KEY" ] && [ "$JINA_API_KEY" != "dummy_key_for_testing" ]; then echo "API key is available for testing"; else echo "API key not available - using dummy for error testing"; fi
––– output –––
OK |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Only progressive is in game from now
Type of Change (select one):
Description of the Change:
Related Issue (provide the link):