Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/sentry_sdk/serializer.py: 9%
216 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1import sys
2import math
4from datetime import datetime
6from sentry_sdk.utils import (
7 AnnotatedValue,
8 capture_internal_exception,
9 disable_capture_event,
10 format_timestamp,
11 json_dumps,
12 safe_repr,
13 strip_string,
14)
16import sentry_sdk.utils
18from sentry_sdk._compat import text_type, PY2, string_types, number_types, iteritems
20from sentry_sdk._types import MYPY
22if MYPY: 22 ↛ 23line 22 didn't jump to line 23, because the condition on line 22 was never true
23 from datetime import timedelta
25 from types import TracebackType
27 from typing import Any
28 from typing import Callable
29 from typing import ContextManager
30 from typing import Dict
31 from typing import List
32 from typing import Optional
33 from typing import Tuple
34 from typing import Type
35 from typing import Union
37 from sentry_sdk._types import NotImplementedType, Event
39 Span = Dict[str, Any]
41 ReprProcessor = Callable[[Any, Dict[str, Any]], Union[NotImplementedType, str]]
42 Segment = Union[str, int]
45if PY2: 45 ↛ 48line 45 didn't jump to line 48, because the condition on line 45 was never true
46 # Importing ABCs from collections is deprecated, and will stop working in 3.8
47 # https://github.com/python/cpython/blob/master/Lib/collections/__init__.py#L49
48 from collections import Mapping, Sequence, Set
50 serializable_str_types = string_types
52else:
53 # New in 3.3
54 # https://docs.python.org/3/library/collections.abc.html
55 from collections.abc import Mapping, Sequence, Set
57 # Bytes are technically not strings in Python 3, but we can serialize them
58 serializable_str_types = (str, bytes)
61# Maximum length of JSON-serialized event payloads that can be safely sent
62# before the server may reject the event due to its size. This is not intended
63# to reflect actual values defined server-side, but rather only be an upper
64# bound for events sent by the SDK.
65#
66# Can be overwritten if wanting to send more bytes, e.g. with a custom server.
67# When changing this, keep in mind that events may be a little bit larger than
68# this value due to attached metadata, so keep the number conservative.
69MAX_EVENT_BYTES = 10**6
71MAX_DATABAG_DEPTH = 5
72MAX_DATABAG_BREADTH = 10
73CYCLE_MARKER = "<cyclic>"
76global_repr_processors = [] # type: List[ReprProcessor]
79def add_global_repr_processor(processor):
80 # type: (ReprProcessor) -> None
81 global_repr_processors.append(processor)
84class Memo(object):
85 __slots__ = ("_ids", "_objs")
87 def __init__(self):
88 # type: () -> None
89 self._ids = {} # type: Dict[int, Any]
90 self._objs = [] # type: List[Any]
92 def memoize(self, obj):
93 # type: (Any) -> ContextManager[bool]
94 self._objs.append(obj)
95 return self
97 def __enter__(self):
98 # type: () -> bool
99 obj = self._objs[-1]
100 if id(obj) in self._ids:
101 return True
102 else:
103 self._ids[id(obj)] = obj
104 return False
106 def __exit__(
107 self,
108 ty, # type: Optional[Type[BaseException]]
109 value, # type: Optional[BaseException]
110 tb, # type: Optional[TracebackType]
111 ):
112 # type: (...) -> None
113 self._ids.pop(id(self._objs.pop()), None)
116def serialize(event, smart_transaction_trimming=False, **kwargs):
117 # type: (Event, bool, **Any) -> Event
118 memo = Memo()
119 path = [] # type: List[Segment]
120 meta_stack = [] # type: List[Dict[str, Any]]
121 span_description_bytes = [] # type: List[int]
123 def _annotate(**meta):
124 # type: (**Any) -> None
125 while len(meta_stack) <= len(path):
126 try:
127 segment = path[len(meta_stack) - 1]
128 node = meta_stack[-1].setdefault(text_type(segment), {})
129 except IndexError:
130 node = {}
132 meta_stack.append(node)
134 meta_stack[-1].setdefault("", {}).update(meta)
136 def _should_repr_strings():
137 # type: () -> Optional[bool]
138 """
139 By default non-serializable objects are going through
140 safe_repr(). For certain places in the event (local vars) we
141 want to repr() even things that are JSON-serializable to
142 make their type more apparent. For example, it's useful to
143 see the difference between a unicode-string and a bytestring
144 when viewing a stacktrace.
146 For container-types we still don't do anything different.
147 Generally we just try to make the Sentry UI present exactly
148 what a pretty-printed repr would look like.
150 :returns: `True` if we are somewhere in frame variables, and `False` if
151 we are in a position where we will never encounter frame variables
152 when recursing (for example, we're in `event.extra`). `None` if we
153 are not (yet) in frame variables, but might encounter them when
154 recursing (e.g. we're in `event.exception`)
155 """
156 try:
157 p0 = path[0]
158 if p0 == "stacktrace" and path[1] == "frames" and path[3] == "vars":
159 return True
161 if (
162 p0 in ("threads", "exception")
163 and path[1] == "values"
164 and path[3] == "stacktrace"
165 and path[4] == "frames"
166 and path[6] == "vars"
167 ):
168 return True
169 except IndexError:
170 return None
172 return False
174 def _is_databag():
175 # type: () -> Optional[bool]
176 """
177 A databag is any value that we need to trim.
179 :returns: Works like `_should_repr_strings()`. `True` for "yes",
180 `False` for :"no", `None` for "maybe soon".
181 """
182 try:
183 rv = _should_repr_strings()
184 if rv in (True, None):
185 return rv
187 p0 = path[0]
188 if p0 == "request" and path[1] == "data":
189 return True
191 if p0 == "breadcrumbs" and path[1] == "values":
192 path[2]
193 return True
195 if p0 == "extra":
196 return True
198 except IndexError:
199 return None
201 return False
203 def _serialize_node(
204 obj, # type: Any
205 is_databag=None, # type: Optional[bool]
206 should_repr_strings=None, # type: Optional[bool]
207 segment=None, # type: Optional[Segment]
208 remaining_breadth=None, # type: Optional[int]
209 remaining_depth=None, # type: Optional[int]
210 ):
211 # type: (...) -> Any
212 if segment is not None:
213 path.append(segment)
215 try:
216 with memo.memoize(obj) as result:
217 if result:
218 return CYCLE_MARKER
220 return _serialize_node_impl(
221 obj,
222 is_databag=is_databag,
223 should_repr_strings=should_repr_strings,
224 remaining_depth=remaining_depth,
225 remaining_breadth=remaining_breadth,
226 )
227 except BaseException:
228 capture_internal_exception(sys.exc_info())
230 if is_databag:
231 return "<failed to serialize, use init(debug=True) to see error logs>"
233 return None
234 finally:
235 if segment is not None:
236 path.pop()
237 del meta_stack[len(path) + 1 :]
239 def _flatten_annotated(obj):
240 # type: (Any) -> Any
241 if isinstance(obj, AnnotatedValue):
242 _annotate(**obj.metadata)
243 obj = obj.value
244 return obj
246 def _serialize_node_impl(
247 obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth
248 ):
249 # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any
250 if should_repr_strings is None:
251 should_repr_strings = _should_repr_strings()
253 if is_databag is None:
254 is_databag = _is_databag()
256 if is_databag and remaining_depth is None:
257 remaining_depth = MAX_DATABAG_DEPTH
258 if is_databag and remaining_breadth is None:
259 remaining_breadth = MAX_DATABAG_BREADTH
261 obj = _flatten_annotated(obj)
263 if remaining_depth is not None and remaining_depth <= 0:
264 _annotate(rem=[["!limit", "x"]])
265 if is_databag:
266 return _flatten_annotated(strip_string(safe_repr(obj)))
267 return None
269 if is_databag and global_repr_processors:
270 hints = {"memo": memo, "remaining_depth": remaining_depth}
271 for processor in global_repr_processors:
272 result = processor(obj, hints)
273 if result is not NotImplemented:
274 return _flatten_annotated(result)
276 sentry_repr = getattr(type(obj), "__sentry_repr__", None)
278 if obj is None or isinstance(obj, (bool, number_types)):
279 if should_repr_strings or (
280 isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj))
281 ):
282 return safe_repr(obj)
283 else:
284 return obj
286 elif callable(sentry_repr):
287 return sentry_repr(obj)
289 elif isinstance(obj, datetime):
290 return (
291 text_type(format_timestamp(obj))
292 if not should_repr_strings
293 else safe_repr(obj)
294 )
296 elif isinstance(obj, Mapping):
297 # Create temporary copy here to avoid calling too much code that
298 # might mutate our dictionary while we're still iterating over it.
299 obj = dict(iteritems(obj))
301 rv_dict = {} # type: Dict[str, Any]
302 i = 0
304 for k, v in iteritems(obj):
305 if remaining_breadth is not None and i >= remaining_breadth:
306 _annotate(len=len(obj))
307 break
309 str_k = text_type(k)
310 v = _serialize_node(
311 v,
312 segment=str_k,
313 should_repr_strings=should_repr_strings,
314 is_databag=is_databag,
315 remaining_depth=remaining_depth - 1
316 if remaining_depth is not None
317 else None,
318 remaining_breadth=remaining_breadth,
319 )
320 rv_dict[str_k] = v
321 i += 1
323 return rv_dict
325 elif not isinstance(obj, serializable_str_types) and isinstance(
326 obj, (Set, Sequence)
327 ):
328 rv_list = []
330 for i, v in enumerate(obj):
331 if remaining_breadth is not None and i >= remaining_breadth:
332 _annotate(len=len(obj))
333 break
335 rv_list.append(
336 _serialize_node(
337 v,
338 segment=i,
339 should_repr_strings=should_repr_strings,
340 is_databag=is_databag,
341 remaining_depth=remaining_depth - 1
342 if remaining_depth is not None
343 else None,
344 remaining_breadth=remaining_breadth,
345 )
346 )
348 return rv_list
350 if should_repr_strings:
351 obj = safe_repr(obj)
352 else:
353 if isinstance(obj, bytes):
354 obj = obj.decode("utf-8", "replace")
356 if not isinstance(obj, string_types):
357 obj = safe_repr(obj)
359 # Allow span descriptions to be longer than other strings.
360 #
361 # For database auto-instrumented spans, the description contains
362 # potentially long SQL queries that are most useful when not truncated.
363 # Because arbitrarily large events may be discarded by the server as a
364 # protection mechanism, we dynamically limit the description length
365 # later in _truncate_span_descriptions.
366 if (
367 smart_transaction_trimming
368 and len(path) == 3
369 and path[0] == "spans"
370 and path[-1] == "description"
371 ):
372 span_description_bytes.append(len(obj))
373 return obj
374 return _flatten_annotated(strip_string(obj))
376 def _truncate_span_descriptions(serialized_event, event, excess_bytes):
377 # type: (Event, Event, int) -> None
378 """
379 Modifies serialized_event in-place trying to remove excess_bytes from
380 span descriptions. The original event is used read-only to access the
381 span timestamps (represented as RFC3399-formatted strings in
382 serialized_event).
384 It uses heuristics to prioritize preserving the description of spans
385 that might be the most interesting ones in terms of understanding and
386 optimizing performance.
387 """
388 # When truncating a description, preserve a small prefix.
389 min_length = 10
391 def shortest_duration_longest_description_first(args):
392 # type: (Tuple[int, Span]) -> Tuple[timedelta, int]
393 i, serialized_span = args
394 span = event["spans"][i]
395 now = datetime.utcnow()
396 start = span.get("start_timestamp") or now
397 end = span.get("timestamp") or now
398 duration = end - start
399 description = serialized_span.get("description") or ""
400 return (duration, -len(description))
402 # Note: for simplicity we sort spans by exact duration and description
403 # length. If ever needed, we could have a more involved heuristic, e.g.
404 # replacing exact durations with "buckets" and/or looking at other span
405 # properties.
406 path.append("spans")
407 for i, span in sorted(
408 enumerate(serialized_event.get("spans") or []),
409 key=shortest_duration_longest_description_first,
410 ):
411 description = span.get("description") or ""
412 if len(description) <= min_length:
413 continue
414 excess_bytes -= len(description) - min_length
415 path.extend([i, "description"])
416 # Note: the last time we call strip_string we could preserve a few
417 # more bytes up to a total length of MAX_EVENT_BYTES. Since that's
418 # not strictly required, we leave it out for now for simplicity.
419 span["description"] = _flatten_annotated(
420 strip_string(description, max_length=min_length)
421 )
422 del path[-2:]
423 del meta_stack[len(path) + 1 :]
425 if excess_bytes <= 0:
426 break
427 path.pop()
428 del meta_stack[len(path) + 1 :]
430 disable_capture_event.set(True)
431 try:
432 rv = _serialize_node(event, **kwargs)
433 if meta_stack and isinstance(rv, dict):
434 rv["_meta"] = meta_stack[0]
436 sum_span_description_bytes = sum(span_description_bytes)
437 if smart_transaction_trimming and sum_span_description_bytes > 0:
438 span_count = len(event.get("spans") or [])
439 # This is an upper bound of how many bytes all descriptions would
440 # consume if the usual string truncation in _serialize_node_impl
441 # would have taken place, not accounting for the metadata attached
442 # as event["_meta"].
443 descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH
445 # If by not truncating descriptions we ended up with more bytes than
446 # per the usual string truncation, check if the event is too large
447 # and we need to truncate some descriptions.
448 #
449 # This is guarded with an if statement to avoid JSON-encoding the
450 # event unnecessarily.
451 if sum_span_description_bytes > descriptions_budget_bytes:
452 original_bytes = len(json_dumps(rv))
453 excess_bytes = original_bytes - MAX_EVENT_BYTES
454 if excess_bytes > 0:
455 # Event is too large, will likely be discarded by the
456 # server. Trim it down before sending.
457 _truncate_span_descriptions(rv, event, excess_bytes)
459 # Span descriptions truncated, set or reset _meta.
460 #
461 # We run the same code earlier because we want to account
462 # for _meta when calculating original_bytes, the number of
463 # bytes in the JSON-encoded event.
464 if meta_stack and isinstance(rv, dict):
465 rv["_meta"] = meta_stack[0]
466 return rv
467 finally:
468 disable_capture_event.set(False)