Skip to content

ref(dsyms): Prefer debug ids over code ids #95861

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 28 additions & 19 deletions src/sentry/api/endpoints/debug_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import jsonschema
import orjson
from django.db import IntegrityError, router
from django.db.models import Q
from django.db.models import Exists, Q
from django.http import Http404, HttpResponse, StreamingHttpResponse
from rest_framework import status
from rest_framework.request import Request
Expand Down Expand Up @@ -265,20 +265,37 @@ def get(self, request: Request, project: Project) -> Response:
except SymbolicError:
pass

q = Q(project_id=project.id)
if file_formats:
file_format_q = Q()
for file_format in file_formats:
known_file_format = DIF_MIMETYPES.get(file_format)
if known_file_format:
file_format_q |= Q(file__headers__icontains=known_file_format)
q &= file_format_q

queryset = None
if debug_id and code_id:
# Be lenient when searching for debug files, check either for a matching debug id
# or a matching code id, as both of these are unique identifiers for an object.
# or a matching code id. We only fallback to code id if there is no debug id match.
# While both identifiers should be unique, in practice they are not and the debug id
# yields better results.
#
# Ideally debug- and code-id yield the same files, but especially on Windows it is possible
# that the debug id does not perfectly match due to 'age' differences, but the code-id
# will match.
q = Q(debug_id__exact=debug_id) | Q(code_id__exact=code_id)
debug_id_qs = ProjectDebugFile.objects.filter(Q(debug_id__exact=debug_id) & q)
queryset = debug_id_qs.select_related("file").union(
ProjectDebugFile.objects.filter(Q(code_id__exact=code_id) & q)
# Only return any code id matches if there are *no* debug id matches.
.filter(~Exists(debug_id_qs)).select_related("file")
)
elif debug_id:
q = Q(debug_id__exact=debug_id)
q &= Q(debug_id__exact=debug_id)
elif code_id:
q = Q(code_id__exact=code_id)
q &= Q(code_id__exact=code_id)
elif query:
q = (
query_q = (
Q(object_name__icontains=query)
| Q(debug_id__icontains=query)
| Q(code_id__icontains=query)
Expand All @@ -288,25 +305,17 @@ def get(self, request: Request, project: Project) -> Response:

known_file_format = DIF_MIMETYPES.get(query)
if known_file_format:
q |= Q(file__headers__icontains=known_file_format)
else:
q = Q()
query_q |= Q(file__headers__icontains=known_file_format)

if file_formats:
file_format_q = Q()
for file_format in file_formats:
known_file_format = DIF_MIMETYPES.get(file_format)
if known_file_format:
file_format_q |= Q(file__headers__icontains=known_file_format)
q &= file_format_q
q &= query_q

q &= Q(project_id=project.id)
queryset = ProjectDebugFile.objects.filter(q).select_related("file")
if queryset is None:
queryset = ProjectDebugFile.objects.filter(q).select_related("file")

def on_results(difs: Sequence[ProjectDebugFile]):
# NOTE: we are only refreshing files if there is direct query for specific files
if debug_id and not query and not file_formats:
maybe_renew_debug_files(q, difs)
maybe_renew_debug_files(difs)

return serialize(difs, request.user)

Expand Down
6 changes: 3 additions & 3 deletions src/sentry/debug_files/debug_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from datetime import timedelta

from django.db import router
from django.db.models import Q
from django.utils import timezone

from sentry.models.debugfile import ProjectDebugFile
Expand All @@ -16,7 +15,7 @@
AVAILABLE_FOR_RENEWAL_DAYS = 30


def maybe_renew_debug_files(query: Q, debug_files: Sequence[ProjectDebugFile]):
def maybe_renew_debug_files(debug_files: Sequence[ProjectDebugFile]):
# We take a snapshot in time that MUST be consistent across all updates.
now = timezone.now()
# We compute the threshold used to determine whether we want to renew the specific bundle.
Expand All @@ -27,10 +26,11 @@ def maybe_renew_debug_files(query: Q, debug_files: Sequence[ProjectDebugFile]):
if not needs_bump:
return

ids = [dif.id for dif in debug_files]
with metrics.timer("debug_files_renewal"):
with atomic_transaction(using=(router.db_for_write(ProjectDebugFile),)):
updated_rows_count = ProjectDebugFile.objects.filter(
query, date_accessed__lte=threshold_date
id__in=ids, date_accessed__lte=threshold_date
).update(date_accessed=now)
if updated_rows_count > 0:
metrics.incr("debug_files_renewal.were_renewed", updated_rows_count)
2 changes: 1 addition & 1 deletion src/sentry/models/debugfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def find_by_debug_ids(
# because otherwise this would be a circular import:
from sentry.debug_files.debug_files import maybe_renew_debug_files

maybe_renew_debug_files(query, difs)
maybe_renew_debug_files(difs)

difs_by_id: dict[str, list[ProjectDebugFile]] = {}
for dif in difs:
Expand Down
36 changes: 36 additions & 0 deletions tests/sentry/api/endpoints/test_debug_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,42 @@ def test_dsyms_search(self):
dsyms = response.data
assert len(dsyms) == 20

def test_dsyms_debugid_codeid_full_match(self):
self._do_test_dsyms_by_debugid_and_codeid(
("dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256"),
[(True, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256")],
)

def test_dsyms_debugid_codeid_full_match_and_partials(self):
self._do_test_dsyms_by_debugid_and_codeid(
("dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256"),
[
(True, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256"),
(False, "00000000-000000000-0000-000000000000", "ae0459704fc7256"),
(True, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "000000000000000"),
],
)

def test_dsyms_debugid_codeid_only_codeid(self):
self._do_test_dsyms_by_debugid_and_codeid(
("22222222-000000000-0000-000000000000", "ae0459704fc7256"),
[
(True, "10000000-000000000-0000-000000000000", "ae0459704fc7256"),
(True, "00000000-000000000-0000-000000000000", "ae0459704fc7256"),
(False, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "000000000000000"),
],
)

def _do_test_dsyms_by_debugid_and_codeid(self, query, files):
for _, debug_id, code_id in files:
self.create_dif_file(debug_id=debug_id, code_id=code_id)

response = self.client.get(f"{self.url}?debug_id={query[0]}&code_id={query[1]}")
assert response.status_code == 200, response.content

actual = sorted((dsym["debugId"], dsym["codeId"]) for dsym in response.data)
assert actual == sorted((debug_id, code_id) for (exp, debug_id, code_id) in files if exp)

def test_access_control(self):
# create a debug files such as proguard:
response = self._upload_proguard(self.url, PROGUARD_UUID)
Expand Down
Loading