Skip to content

Commit

Permalink
Added code references to search results.
Browse files Browse the repository at this point in the history
  • Loading branch information
sarahboyce committed Feb 14, 2025
1 parent 0070473 commit 511a787
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 1 deletion.
14 changes: 14 additions & 0 deletions docs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ def sync_to_db(self, decoded_documents):
document_path = _clean_document_path(document["current_page_name"])
document["slug"] = Path(document_path).parts[-1]
document["parents"] = " ".join(Path(document_path).parts[:-1])
code_references = utils.generate_code_references(document["body"])
document["code_references"] = code_references
document["code_references_search"] = " ".join(code_references.keys())
Document.objects.create(
release=self,
path=document_path,
Expand Down Expand Up @@ -278,7 +281,18 @@ def search(self, query_text, release):
stop_sel=STOP_SEL,
config=models.F("config"),
),
code_matched=SearchHeadline(
KeyTextTransform("code_references_search", "metadata"),
SearchQuery(
query_text, config="simple", search_type="websearch"
),
start_sel=START_SEL,
stop_sel=STOP_SEL,
config="simple",
highlight_all=True,
),
breadcrumbs=models.F("metadata__breadcrumbs"),
code_references=models.F("metadata__code_references"),
)
.only(
"path",
Expand Down
12 changes: 12 additions & 0 deletions docs/templates/docs/search_results.html
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ <h2 class="result-title">
{% if result.highlight %}
…&nbsp;{{ result.highlight|cut:"¶"|safe }}&nbsp;…
{% endif %}
{% code_links result.code_matched result.code_references as result_code_links %}
{% if result_code_links %}
<div style="margin-top: 15px; margin-left: 10px;">
{% for name, id_link in result_code_links.items %}
<p>
<a href="{% url 'document-detail' lang=result.release.lang version=result.release.version url=result.path host 'docs' %}#{{ id_link }}">
<i class="icon icon-code"></i> <b>{{ name }}</b> ({{ id_link }})
</a>
</p>
{% endfor %}
</div>
{% endif %}
</dd>
{% endfor %}
</dl>
Expand Down
16 changes: 16 additions & 0 deletions docs/templatetags/docs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
from collections import OrderedDict
from urllib.parse import quote

from django import template
Expand Down Expand Up @@ -121,3 +122,18 @@ def generate_scroll_to_text_fragment(highlighted_text):
# Due to Python code such as timezone.now(), remove the space after a bracket.
single_spaced = re.sub(r"([(\[])\s", r"\1", single_spaced)
return f"#:~:text={quote(single_spaced)}"


@register.simple_tag(name="code_links")
def code_links(code_matched, code_references):
code_matches = [
word.replace(START_SEL, "").replace(STOP_SEL, "")
for word in code_matched.split(" ")
if START_SEL in word
]
matched_reference = {
name: id_link
for name, id_link in code_references.items()
if name in code_matches
}
return OrderedDict(sorted(matched_reference.items()))
53 changes: 52 additions & 1 deletion docs/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from .models import DOCUMENT_SEARCH_VECTOR, Document, DocumentRelease
from .sitemaps import DocsSitemap
from .templatetags.docs import generate_scroll_to_text_fragment, get_all_doc_versions
from .utils import get_doc_path, sanitize_for_trigram
from .utils import generate_code_references, get_doc_path, sanitize_for_trigram


class ModelsTests(TestCase):
Expand Down Expand Up @@ -360,6 +360,25 @@ def test_sanitize_for_trigram(self):
with self.subTest(query=query):
self.assertEqual(sanitize_for_trigram(query), sanitized_query)

def test_generate_code_references(self):
test_cases = [
("", {}),
(
'<dl class="py attribute"><dt class="sig sig-object py" id="django.db.migrations.Migration.initial">',
{"Migration.initial": "django.db.migrations.Migration.initial"},
),
(
'<dl class="py class"><dt class="sig sig-object py" id="django.contrib.gis.gdal.Layer">',
{"Layer": "django.contrib.gis.gdal.Layer"},
),
(
'<dl class="py method"><dt class="sig sig-object py" id="db_for_write">',
{"db_for_write": "db_for_write"},
),
]
for body, expected in test_cases:
self.assertEqual(generate_code_references(body), expected)


class UpdateDocTests(TestCase):
@classmethod
Expand Down Expand Up @@ -424,6 +443,38 @@ def test_title_entities(self):
transform=attrgetter("title"),
)

def test_code_entities(self):
self.release.sync_to_db(
[
{
"body": (
'<dl class="py class"><dt class="sig sig-object py" id="django.contrib.gis.gdal.Layer">'
'<dl class="py attribute"><dt class="sig sig-object py" id="django.db.migrations.Migration.initial">'
'<dl class="py method"><dt class="sig sig-object py" id="db_for_write">'
),
"title": "This is the title",
"current_page_name": "foo/bar",
}
]
)
self.assertQuerySetEqual(
self.release.documents.all(),
[
(
{
"Layer": "django.contrib.gis.gdal.Layer",
"Migration.initial": "django.db.migrations.Migration.initial",
"db_for_write": "db_for_write",
},
"Layer Migration.initial db_for_write",
)
],
transform=lambda doc: (
doc.metadata["code_references"],
doc.metadata["code_references_search"],
),
)

def test_empty_documents(self):
self.release.sync_to_db(
[
Expand Down
38 changes: 38 additions & 0 deletions docs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,41 @@ def sanitize_for_trigram(text):
text = unicodedata.normalize("NFKD", text)
text = re.sub(r"[^\w\s]", "", text, flags=re.UNICODE)
return " ".join(text.split())


def generate_code_references(body):
"""
Django documents classes with the syntax `.. class::`.
This results in the following HTML:
<dl class="py class">
<dt class="sig sig-object py" id="django.db.models.ManyToManyField">
...
</dt>
</dl>
This is similar for attributes (`.. attribute::`), methods etc.
"""
# Collect all <dt> HTML tag ids into a list, e.g:
# [
# 'django.db.models.Index',
# 'django.db.models.Index.expressions',
# 'django.db.models.Index.fields',
# ...
# ]
code_references = list(re.findall(r'<dt[^>]+id="([^"]+)"', body))
# As the search term can be "expressions", "Index.expressions" etc. create a mapping
# between potential code search terms and their HTML id.
# {
# 'Index': 'django.db.models.Index',
# 'Index.expressions': 'django.db.models.Index.expressions',
# 'Index.fields': 'django.db.models.Index.fields',
# ...
# }
code_paths = {}
for reference in code_references:
code_path = reference.split(".")[-2:]
if code_path[0][0].isupper():
name = ".".join(code_path)
code_paths[name] = reference
else:
code_paths[code_path[-1]] = reference
return code_paths

0 comments on commit 511a787

Please sign in to comment.