langchain-ai · lkuligin · Feb 23, 2025 · Feb 19, 2025 · Feb 19, 2025 · Feb 19, 2025
diff --git a/libs/community/langchain_google_community/bq_storage_vectorstores/_base.py b/libs/community/langchain_google_community/bq_storage_vectorstores/_base.py
@@ -364,7 +364,7 @@ def similarity_search_by_vectors(
         **kwargs: Any,
     ) -> Any:
         """Core similarity search function. Handles a list of embedding vectors,
-            optionally returning scores and embeddings.
+        optionally returning scores and embeddings.
 
         Args:
             embeddings: A list of embedding vectors, where each vector is a list of
@@ -468,7 +468,7 @@ def similarity_search_with_score(
         **kwargs: Any,
     ) -> List[Tuple[Document, float]]:
         """Search for top `k` docs most similar to input query, returns both docs and
-            scores.
+        scores.
 
         Args:
             query: search query to search documents with.

diff --git a/libs/community/langchain_google_community/bq_storage_vectorstores/bigquery.py b/libs/community/langchain_google_community/bq_storage_vectorstores/bigquery.py
@@ -1,7 +1,7 @@
 import uuid
 from datetime import datetime, timedelta
 from threading import Lock, Thread
-from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Type, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Type, Union
 
 from google.api_core.exceptions import ClientError
 from langchain_core.documents import Document
@@ -77,11 +77,14 @@ def get_documents(
 
         Args:
             ids: List of ids of documents to retrieve from the vectorstore.
-            filter: Filter on metadata properties, e.g.
-                            {
-                                "str_property": "foo",
-                                "int_property": 123
-                            }
+            filter: (Optional) A dictionary or a string specifying filter criteria.
+                - If a dictionary is provided, it should map column names to their
+                corresponding values. The method will generate SQL expressions based
+                on the data types defined in `self.table_schema`. The value is enclosed
+                in single quotes unless the column is of type "INTEGER" or "FLOAT", in
+                which case the value is used directly. E.g., `{"str_property": "foo",
+                "int_property": 123}`.
+                - If a string is provided, it is assumed to be a valid SQL WHERE clause.
         Returns:
             List of ids from adding the texts into the vectorstore.
         """
@@ -201,18 +204,11 @@ def _similarity_search_by_vectors_with_scores_and_embeddings(
                 query embedding.
             filter: (Optional) A dictionary or a string specifying filter criteria.
                 - If a dictionary is provided, it should map column names to their
-                corresponding
-                values. The method will generate SQL expressions based on the data
-                types defined
-                in `self.table_schema`:
-                    - For columns of type "INTEGER" or "FLOAT", the value is used
-                    directly.
-                    - For other data types, the value is enclosed in single quotes.
-                Example:
-                        {
-                            "str_property": "foo",
-                            "int_property": 123
-                        }
+                corresponding values. The method will generate SQL expressions based
+                on the data types defined in `self.table_schema`. The value is enclosed
+                in single quotes unless the column is of type "INTEGER" or "FLOAT", in
+                which case the value is used directly. E.g., `{"str_property": "foo",
+                "int_property": 123}`.
                 - If a string is provided, it is assumed to be a valid SQL WHERE clause.
             k: The number of top results to return for each query.
             batch_size: The size of batches to process embeddings.
@@ -255,18 +251,12 @@ def _create_filters(
 
         Args:
             filter: (Optional) A dictionary or a string specifying filter criteria.
-                - If a dictionary is provided, it should map column names to
-                their corresponding
-                values. The method will generate SQL expressions based on the
-                data types defined in `self.table_schema`:
-                    - For columns of type "INTEGER" or "FLOAT", the value is
-                    used directly.
-                    - For other data types, the value is enclosed in single quotes.
-                Example:
-                    {
-                        "str_property": "foo",
-                        "int_property": 123
-                    }
+                - If a dictionary is provided, it should map column names to their
+                corresponding values. The method will generate SQL expressions based
+                on the data types defined in `self.table_schema`. The value is enclosed
+                in single quotes unless the column is of type "INTEGER" or "FLOAT", in
+                which case the value is used directly. E.g., `{"str_property": "foo",
+                "int_property": 123}`.
                 - If a string is provided, it is assumed to be a valid SQL WHERE clause.
 
         Returns:
@@ -462,19 +452,25 @@ def batch_search(
         expire_hours_temp_table: int = 12,
     ) -> List[List[List[Any]]]:
         """Multi-purpose batch search function. Accepts either embeddings or queries
-            but not both. Optionally returns similarity scores and/or matched embeddings
+        but not both. Optionally returns similarity scores and/or matched embeddings
+
         Args:
-        embeddings: A list of embeddings to search with. If provided, each
-            embedding represents a query vector.
-        queries: A list of text queries to search with.  If provided, each
-            query represents a query text.
-        filter: A dictionary of filters to apply to the search. The keys
-            of the dictionary should be field names, and the values should be the
-                values to filter on. (e.g., {"category": "news"})
-        k: The number of top results to return per query. Defaults to 5.
-        with_scores: If True, returns the relevance scores of the results along with
-            the documents
-        with_embeddings: If True, returns the embeddings of the results along with
+            embeddings: A list of embeddings to search with. If provided, each
+                embedding represents a query vector.
+            queries: A list of text queries to search with.  If provided, each
+                query represents a query text.
+            filter: (Optional) A dictionary or a string specifying filter criteria.
+                - If a dictionary is provided, it should map column names to their
+                corresponding values. The method will generate SQL expressions based
+                on the data types defined in `self.table_schema`. The value is enclosed
+                in single quotes unless the column is of type "INTEGER" or "FLOAT", in
+                which case the value is used directly. E.g., `{"str_property": "foo",
+                "int_property": 123}`.
+                - If a string is provided, it is assumed to be a valid SQL WHERE clause.
+            k: The number of top results to return per query. Defaults to 5.
+            with_scores: If True, returns the relevance scores of the results along with
+                the documents
+            with_embeddings: If True, returns the embeddings of the results along with
             the documents
         """
         from google.cloud import bigquery  # type: ignore[attr-defined]
@@ -525,6 +521,165 @@ def batch_search(
             with_embeddings=False,
         )
 
+    def similarity_search_by_vectors(
+        self,
+        embeddings: List[List[float]],
+        filter: Optional[Union[Dict[str, Any], str]] = None,
+        k: int = 5,
+        with_scores: bool = False,
+        with_embeddings: bool = False,
+        **kwargs: Any,
+    ) -> Any:
+        """Core similarity search function. Handles a list of embedding vectors,
+        optionally returning scores and embeddings.
+
+        Args:
+            embeddings: A list of embedding vectors, where each vector is a list of
+                floats.
+            filter: (Optional) A dictionary or a string specifying filter criteria.
+                - If a dictionary is provided, it should map column names to their
+                corresponding values. The method will generate SQL expressions based
+                on the data types defined in `self.table_schema`. The value is enclosed
+                in single quotes unless the column is of type "INTEGER" or "FLOAT", in
+                which case the value is used directly. E.g., `{"str_property": "foo",
+                "int_property": 123}`.
+                - If a string is provided, it is assumed to be a valid SQL WHERE clause.
+            k: (Optional) The number of top-ranking similar documents to return per
+                embedding. Defaults to 5.
+            with_scores: (Optional) If True, include similarity scores in the result
+                for each matched document. Defaults to False.
+            with_embeddings: (Optional) If True, include the matched document's
+                embedding vector in the result. Defaults to False.
+        Returns:
+            A list of `k` documents for each embedding in `embeddings`
+        """
+        results = self._similarity_search_by_vectors_with_scores_and_embeddings(
+            embeddings=embeddings, k=k, filter=filter, **kwargs
+        )
+
+        # Process results based on options
+        for i, query_results in enumerate(results):
+            if not with_scores and not with_embeddings:
+                # return only docs
+                results[i] = [x[0] for x in query_results]
+            elif not with_embeddings:
+                # return only docs and score
+                results[i] = [[x[0], x[1]] for x in query_results]
+            elif not with_scores:
+                # return only docs and embeddings
+                results[i] = [[x[0], x[2]] for x in query_results]
+
+        return results  # type: ignore[return-value]
+
+    def similarity_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 5,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            filter: (Optional) A dictionary or a string specifying filter criteria.
+                - If a dictionary is provided, it should map column names to their
+                corresponding values. The method will generate SQL expressions based
+                on the data types defined in `self.table_schema`. The value is enclosed
+                in single quotes unless the column is of type "INTEGER" or "FLOAT", in
+                which case the value is used directly. E.g., `{"str_property": "foo",
+                "int_property": 123}`.
+                - If a string is provided, it is assumed to be a valid SQL WHERE clause.
+            k: (Optional) The number of top-ranking similar documents to return per
+                embedding. Defaults to 5.
+        Returns:
+            Return docs most similar to embedding vector.
+        """
+        return self.similarity_search_by_vectors(embeddings=[embedding], k=k, **kwargs)[
+            0
+        ]
+
+    def similarity_search_by_vector_with_score(
+        self,
+        embedding: List[float],
+        filter: Optional[Union[Dict[str, Any], str]] = None,
+        k: int = 5,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to embedding vector with scores.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            filter: (Optional) A dictionary or a string specifying filter criteria.
+                - If a dictionary is provided, it should map column names to their
+                corresponding values. The method will generate SQL expressions based
+                on the data types defined in `self.table_schema`. The value is enclosed
+                in single quotes unless the column is of type "INTEGER" or "FLOAT", in
+                which case the value is used directly. E.g., `{"str_property": "foo",
+                "int_property": 123}`.
+                - If a string is provided, it is assumed to be a valid SQL WHERE clause.
+            k: (Optional) The number of top-ranking similar documents to return per
+                embedding. Defaults to 5.
+        Returns:
+            Return docs most similar to embedding vector.
+        """
+        return self.similarity_search_by_vectors(
+            embeddings=[embedding], filter=filter, k=k, with_scores=True
+        )[0]
+
+    def similarity_search(
+        self, query: str, k: int = 5, **kwargs: Any
+    ) -> List[Document]:
+        """Search for top `k` docs most similar to input query.
+
+        Args:
+            query: search query to search documents with.
+            filter: (Optional) A dictionary or a string specifying filter criteria.
+                - If a dictionary is provided, it should map column names to their
+                corresponding values. The method will generate SQL expressions based
+                on the data types defined in `self.table_schema`. The value is enclosed
+                in single quotes unless the column is of type "INTEGER" or "FLOAT", in
+                which case the value is used directly. E.g., `{"str_property": "foo",
+                "int_property": 123}`.
+                - If a string is provided, it is assumed to be a valid SQL WHERE clause.
+            k: (Optional) The number of top-ranking similar documents to return per
+                embedding. Defaults to 5.
+        Returns:
+            Return docs most similar to input query.
+        """
+        embedding = self.embedding.embed_query(query)
+        return self.similarity_search_by_vectors(embeddings=[embedding], k=k, **kwargs)[
+            0
+        ]
+
+    def similarity_search_with_score(
+        self,
+        query: str,
+        filter: Optional[Union[Dict[str, Any], str]] = None,
+        k: int = 5,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Search for top `k` docs most similar to input query, returns both docs and
+        scores.
+
+        Args:
+            query: search query to search documents with.
+            filter: (Optional) A dictionary or a string specifying filter criteria.
+                - If a dictionary is provided, it should map column names to their
+                corresponding values. The method will generate SQL expressions based
+                on the data types defined in `self.table_schema`. The value is enclosed
+                in single quotes unless the column is of type "INTEGER" or "FLOAT", in
+                which case the value is used directly. E.g., `{"str_property": "foo",
+                "int_property": 123}`.
+                - If a string is provided, it is assumed to be a valid SQL WHERE clause.
+            k: (Optional) The number of top-ranking similar documents to return per
+                embedding. Defaults to 5.
+        Returns:
+            Return docs most similar to input query along with scores.
+        """
+        embedding = self.embedding.embed_query(query)
+        return self.similarity_search_by_vector_with_score(
+            embedding=embedding, filter=filter, k=k
+        )
+
     @classmethod
     def from_texts(
         cls: Type["BigQueryVectorStore"],
@@ -549,12 +704,11 @@ def from_texts(
         return vs_obj
 
     def to_vertex_fs_vector_store(self, **kwargs: Any) -> Any:
-        """
-        Creates and returns a VertexFSVectorStore instance based on configuration.
+        """Creates and returns a VertexFSVectorStore instance based on configuration.
 
         This method merges the base BigQuery vector store configuration with provided
-            keyword arguments,
-        then uses the combined parameters to instantiate a VertexFSVectorStore.
+        keyword arguments, then uses the combined parameters to instantiate a 
+        VertexFSVectorStore.
 
         Args:
             **kwargs: Additional keyword arguments to override or extend the base