Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/sentry_sdk/serializer.py: 9%

216 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1import sys 

2import math 

3 

4from datetime import datetime 

5 

6from sentry_sdk.utils import ( 

7 AnnotatedValue, 

8 capture_internal_exception, 

9 disable_capture_event, 

10 format_timestamp, 

11 json_dumps, 

12 safe_repr, 

13 strip_string, 

14) 

15 

16import sentry_sdk.utils 

17 

18from sentry_sdk._compat import text_type, PY2, string_types, number_types, iteritems 

19 

20from sentry_sdk._types import MYPY 

21 

22if MYPY: 22 ↛ 23line 22 didn't jump to line 23, because the condition on line 22 was never true

23 from datetime import timedelta 

24 

25 from types import TracebackType 

26 

27 from typing import Any 

28 from typing import Callable 

29 from typing import ContextManager 

30 from typing import Dict 

31 from typing import List 

32 from typing import Optional 

33 from typing import Tuple 

34 from typing import Type 

35 from typing import Union 

36 

37 from sentry_sdk._types import NotImplementedType, Event 

38 

39 Span = Dict[str, Any] 

40 

41 ReprProcessor = Callable[[Any, Dict[str, Any]], Union[NotImplementedType, str]] 

42 Segment = Union[str, int] 

43 

44 

45if PY2: 45 ↛ 48line 45 didn't jump to line 48, because the condition on line 45 was never true

46 # Importing ABCs from collections is deprecated, and will stop working in 3.8 

47 # https://github.com/python/cpython/blob/master/Lib/collections/__init__.py#L49 

48 from collections import Mapping, Sequence, Set 

49 

50 serializable_str_types = string_types 

51 

52else: 

53 # New in 3.3 

54 # https://docs.python.org/3/library/collections.abc.html 

55 from collections.abc import Mapping, Sequence, Set 

56 

57 # Bytes are technically not strings in Python 3, but we can serialize them 

58 serializable_str_types = (str, bytes) 

59 

60 

61# Maximum length of JSON-serialized event payloads that can be safely sent 

62# before the server may reject the event due to its size. This is not intended 

63# to reflect actual values defined server-side, but rather only be an upper 

64# bound for events sent by the SDK. 

65# 

66# Can be overwritten if wanting to send more bytes, e.g. with a custom server. 

67# When changing this, keep in mind that events may be a little bit larger than 

68# this value due to attached metadata, so keep the number conservative. 

69MAX_EVENT_BYTES = 10**6 

70 

71MAX_DATABAG_DEPTH = 5 

72MAX_DATABAG_BREADTH = 10 

73CYCLE_MARKER = "<cyclic>" 

74 

75 

76global_repr_processors = [] # type: List[ReprProcessor] 

77 

78 

79def add_global_repr_processor(processor): 

80 # type: (ReprProcessor) -> None 

81 global_repr_processors.append(processor) 

82 

83 

84class Memo(object): 

85 __slots__ = ("_ids", "_objs") 

86 

87 def __init__(self): 

88 # type: () -> None 

89 self._ids = {} # type: Dict[int, Any] 

90 self._objs = [] # type: List[Any] 

91 

92 def memoize(self, obj): 

93 # type: (Any) -> ContextManager[bool] 

94 self._objs.append(obj) 

95 return self 

96 

97 def __enter__(self): 

98 # type: () -> bool 

99 obj = self._objs[-1] 

100 if id(obj) in self._ids: 

101 return True 

102 else: 

103 self._ids[id(obj)] = obj 

104 return False 

105 

106 def __exit__( 

107 self, 

108 ty, # type: Optional[Type[BaseException]] 

109 value, # type: Optional[BaseException] 

110 tb, # type: Optional[TracebackType] 

111 ): 

112 # type: (...) -> None 

113 self._ids.pop(id(self._objs.pop()), None) 

114 

115 

116def serialize(event, smart_transaction_trimming=False, **kwargs): 

117 # type: (Event, bool, **Any) -> Event 

118 memo = Memo() 

119 path = [] # type: List[Segment] 

120 meta_stack = [] # type: List[Dict[str, Any]] 

121 span_description_bytes = [] # type: List[int] 

122 

123 def _annotate(**meta): 

124 # type: (**Any) -> None 

125 while len(meta_stack) <= len(path): 

126 try: 

127 segment = path[len(meta_stack) - 1] 

128 node = meta_stack[-1].setdefault(text_type(segment), {}) 

129 except IndexError: 

130 node = {} 

131 

132 meta_stack.append(node) 

133 

134 meta_stack[-1].setdefault("", {}).update(meta) 

135 

136 def _should_repr_strings(): 

137 # type: () -> Optional[bool] 

138 """ 

139 By default non-serializable objects are going through 

140 safe_repr(). For certain places in the event (local vars) we 

141 want to repr() even things that are JSON-serializable to 

142 make their type more apparent. For example, it's useful to 

143 see the difference between a unicode-string and a bytestring 

144 when viewing a stacktrace. 

145 

146 For container-types we still don't do anything different. 

147 Generally we just try to make the Sentry UI present exactly 

148 what a pretty-printed repr would look like. 

149 

150 :returns: `True` if we are somewhere in frame variables, and `False` if 

151 we are in a position where we will never encounter frame variables 

152 when recursing (for example, we're in `event.extra`). `None` if we 

153 are not (yet) in frame variables, but might encounter them when 

154 recursing (e.g. we're in `event.exception`) 

155 """ 

156 try: 

157 p0 = path[0] 

158 if p0 == "stacktrace" and path[1] == "frames" and path[3] == "vars": 

159 return True 

160 

161 if ( 

162 p0 in ("threads", "exception") 

163 and path[1] == "values" 

164 and path[3] == "stacktrace" 

165 and path[4] == "frames" 

166 and path[6] == "vars" 

167 ): 

168 return True 

169 except IndexError: 

170 return None 

171 

172 return False 

173 

174 def _is_databag(): 

175 # type: () -> Optional[bool] 

176 """ 

177 A databag is any value that we need to trim. 

178 

179 :returns: Works like `_should_repr_strings()`. `True` for "yes", 

180 `False` for :"no", `None` for "maybe soon". 

181 """ 

182 try: 

183 rv = _should_repr_strings() 

184 if rv in (True, None): 

185 return rv 

186 

187 p0 = path[0] 

188 if p0 == "request" and path[1] == "data": 

189 return True 

190 

191 if p0 == "breadcrumbs" and path[1] == "values": 

192 path[2] 

193 return True 

194 

195 if p0 == "extra": 

196 return True 

197 

198 except IndexError: 

199 return None 

200 

201 return False 

202 

203 def _serialize_node( 

204 obj, # type: Any 

205 is_databag=None, # type: Optional[bool] 

206 should_repr_strings=None, # type: Optional[bool] 

207 segment=None, # type: Optional[Segment] 

208 remaining_breadth=None, # type: Optional[int] 

209 remaining_depth=None, # type: Optional[int] 

210 ): 

211 # type: (...) -> Any 

212 if segment is not None: 

213 path.append(segment) 

214 

215 try: 

216 with memo.memoize(obj) as result: 

217 if result: 

218 return CYCLE_MARKER 

219 

220 return _serialize_node_impl( 

221 obj, 

222 is_databag=is_databag, 

223 should_repr_strings=should_repr_strings, 

224 remaining_depth=remaining_depth, 

225 remaining_breadth=remaining_breadth, 

226 ) 

227 except BaseException: 

228 capture_internal_exception(sys.exc_info()) 

229 

230 if is_databag: 

231 return "<failed to serialize, use init(debug=True) to see error logs>" 

232 

233 return None 

234 finally: 

235 if segment is not None: 

236 path.pop() 

237 del meta_stack[len(path) + 1 :] 

238 

239 def _flatten_annotated(obj): 

240 # type: (Any) -> Any 

241 if isinstance(obj, AnnotatedValue): 

242 _annotate(**obj.metadata) 

243 obj = obj.value 

244 return obj 

245 

246 def _serialize_node_impl( 

247 obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth 

248 ): 

249 # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any 

250 if should_repr_strings is None: 

251 should_repr_strings = _should_repr_strings() 

252 

253 if is_databag is None: 

254 is_databag = _is_databag() 

255 

256 if is_databag and remaining_depth is None: 

257 remaining_depth = MAX_DATABAG_DEPTH 

258 if is_databag and remaining_breadth is None: 

259 remaining_breadth = MAX_DATABAG_BREADTH 

260 

261 obj = _flatten_annotated(obj) 

262 

263 if remaining_depth is not None and remaining_depth <= 0: 

264 _annotate(rem=[["!limit", "x"]]) 

265 if is_databag: 

266 return _flatten_annotated(strip_string(safe_repr(obj))) 

267 return None 

268 

269 if is_databag and global_repr_processors: 

270 hints = {"memo": memo, "remaining_depth": remaining_depth} 

271 for processor in global_repr_processors: 

272 result = processor(obj, hints) 

273 if result is not NotImplemented: 

274 return _flatten_annotated(result) 

275 

276 sentry_repr = getattr(type(obj), "__sentry_repr__", None) 

277 

278 if obj is None or isinstance(obj, (bool, number_types)): 

279 if should_repr_strings or ( 

280 isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj)) 

281 ): 

282 return safe_repr(obj) 

283 else: 

284 return obj 

285 

286 elif callable(sentry_repr): 

287 return sentry_repr(obj) 

288 

289 elif isinstance(obj, datetime): 

290 return ( 

291 text_type(format_timestamp(obj)) 

292 if not should_repr_strings 

293 else safe_repr(obj) 

294 ) 

295 

296 elif isinstance(obj, Mapping): 

297 # Create temporary copy here to avoid calling too much code that 

298 # might mutate our dictionary while we're still iterating over it. 

299 obj = dict(iteritems(obj)) 

300 

301 rv_dict = {} # type: Dict[str, Any] 

302 i = 0 

303 

304 for k, v in iteritems(obj): 

305 if remaining_breadth is not None and i >= remaining_breadth: 

306 _annotate(len=len(obj)) 

307 break 

308 

309 str_k = text_type(k) 

310 v = _serialize_node( 

311 v, 

312 segment=str_k, 

313 should_repr_strings=should_repr_strings, 

314 is_databag=is_databag, 

315 remaining_depth=remaining_depth - 1 

316 if remaining_depth is not None 

317 else None, 

318 remaining_breadth=remaining_breadth, 

319 ) 

320 rv_dict[str_k] = v 

321 i += 1 

322 

323 return rv_dict 

324 

325 elif not isinstance(obj, serializable_str_types) and isinstance( 

326 obj, (Set, Sequence) 

327 ): 

328 rv_list = [] 

329 

330 for i, v in enumerate(obj): 

331 if remaining_breadth is not None and i >= remaining_breadth: 

332 _annotate(len=len(obj)) 

333 break 

334 

335 rv_list.append( 

336 _serialize_node( 

337 v, 

338 segment=i, 

339 should_repr_strings=should_repr_strings, 

340 is_databag=is_databag, 

341 remaining_depth=remaining_depth - 1 

342 if remaining_depth is not None 

343 else None, 

344 remaining_breadth=remaining_breadth, 

345 ) 

346 ) 

347 

348 return rv_list 

349 

350 if should_repr_strings: 

351 obj = safe_repr(obj) 

352 else: 

353 if isinstance(obj, bytes): 

354 obj = obj.decode("utf-8", "replace") 

355 

356 if not isinstance(obj, string_types): 

357 obj = safe_repr(obj) 

358 

359 # Allow span descriptions to be longer than other strings. 

360 # 

361 # For database auto-instrumented spans, the description contains 

362 # potentially long SQL queries that are most useful when not truncated. 

363 # Because arbitrarily large events may be discarded by the server as a 

364 # protection mechanism, we dynamically limit the description length 

365 # later in _truncate_span_descriptions. 

366 if ( 

367 smart_transaction_trimming 

368 and len(path) == 3 

369 and path[0] == "spans" 

370 and path[-1] == "description" 

371 ): 

372 span_description_bytes.append(len(obj)) 

373 return obj 

374 return _flatten_annotated(strip_string(obj)) 

375 

376 def _truncate_span_descriptions(serialized_event, event, excess_bytes): 

377 # type: (Event, Event, int) -> None 

378 """ 

379 Modifies serialized_event in-place trying to remove excess_bytes from 

380 span descriptions. The original event is used read-only to access the 

381 span timestamps (represented as RFC3399-formatted strings in 

382 serialized_event). 

383 

384 It uses heuristics to prioritize preserving the description of spans 

385 that might be the most interesting ones in terms of understanding and 

386 optimizing performance. 

387 """ 

388 # When truncating a description, preserve a small prefix. 

389 min_length = 10 

390 

391 def shortest_duration_longest_description_first(args): 

392 # type: (Tuple[int, Span]) -> Tuple[timedelta, int] 

393 i, serialized_span = args 

394 span = event["spans"][i] 

395 now = datetime.utcnow() 

396 start = span.get("start_timestamp") or now 

397 end = span.get("timestamp") or now 

398 duration = end - start 

399 description = serialized_span.get("description") or "" 

400 return (duration, -len(description)) 

401 

402 # Note: for simplicity we sort spans by exact duration and description 

403 # length. If ever needed, we could have a more involved heuristic, e.g. 

404 # replacing exact durations with "buckets" and/or looking at other span 

405 # properties. 

406 path.append("spans") 

407 for i, span in sorted( 

408 enumerate(serialized_event.get("spans") or []), 

409 key=shortest_duration_longest_description_first, 

410 ): 

411 description = span.get("description") or "" 

412 if len(description) <= min_length: 

413 continue 

414 excess_bytes -= len(description) - min_length 

415 path.extend([i, "description"]) 

416 # Note: the last time we call strip_string we could preserve a few 

417 # more bytes up to a total length of MAX_EVENT_BYTES. Since that's 

418 # not strictly required, we leave it out for now for simplicity. 

419 span["description"] = _flatten_annotated( 

420 strip_string(description, max_length=min_length) 

421 ) 

422 del path[-2:] 

423 del meta_stack[len(path) + 1 :] 

424 

425 if excess_bytes <= 0: 

426 break 

427 path.pop() 

428 del meta_stack[len(path) + 1 :] 

429 

430 disable_capture_event.set(True) 

431 try: 

432 rv = _serialize_node(event, **kwargs) 

433 if meta_stack and isinstance(rv, dict): 

434 rv["_meta"] = meta_stack[0] 

435 

436 sum_span_description_bytes = sum(span_description_bytes) 

437 if smart_transaction_trimming and sum_span_description_bytes > 0: 

438 span_count = len(event.get("spans") or []) 

439 # This is an upper bound of how many bytes all descriptions would 

440 # consume if the usual string truncation in _serialize_node_impl 

441 # would have taken place, not accounting for the metadata attached 

442 # as event["_meta"]. 

443 descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH 

444 

445 # If by not truncating descriptions we ended up with more bytes than 

446 # per the usual string truncation, check if the event is too large 

447 # and we need to truncate some descriptions. 

448 # 

449 # This is guarded with an if statement to avoid JSON-encoding the 

450 # event unnecessarily. 

451 if sum_span_description_bytes > descriptions_budget_bytes: 

452 original_bytes = len(json_dumps(rv)) 

453 excess_bytes = original_bytes - MAX_EVENT_BYTES 

454 if excess_bytes > 0: 

455 # Event is too large, will likely be discarded by the 

456 # server. Trim it down before sending. 

457 _truncate_span_descriptions(rv, event, excess_bytes) 

458 

459 # Span descriptions truncated, set or reset _meta. 

460 # 

461 # We run the same code earlier because we want to account 

462 # for _meta when calculating original_bytes, the number of 

463 # bytes in the JSON-encoded event. 

464 if meta_stack and isinstance(rv, dict): 

465 rv["_meta"] = meta_stack[0] 

466 return rv 

467 finally: 

468 disable_capture_event.set(False)