Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 45 additions & 8 deletions apod/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
import re

import requests
import urllib3
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# import urllib.request

Expand All @@ -24,8 +25,38 @@
# location of backing APOD service
BASE = "https://apod.nasa.gov/apod/"

# Create urllib3 Pool Manager
http = urllib3.PoolManager()
# apod.nasa.gov intermittently hangs or returns 5xx errors; without a timeout
# a stuck upstream connection holds a worker indefinitely, and without retries
# a single transient failure becomes a hard error for the client.
# Hung connections are bounded so the request cycle fits within gunicorn's
# default 30s worker timeout: connect hangs cost at most ~16s (3 attempts of
# 5s plus backoff) and read hangs at most ~26s (2 attempts of 13s). Transient
# 5xx responses normally arrive quickly, so retrying them adds little latency.
CONNECT_TIMEOUT = 5 # seconds to establish the upstream connection
READ_TIMEOUT = 8 # seconds to wait for the upstream response
REQUEST_TIMEOUT = (CONNECT_TIMEOUT, READ_TIMEOUT)

# Shared session so outbound requests reuse pooled connections and retry
# transient upstream failures with exponential backoff. Retry-After headers
# are deliberately ignored: honoring a long server-specified delay on a
# 429/503 would sleep the worker past its timeout. raise_on_status=False
# hands the final response back so the caller decides how to surface it.
session = requests.Session()
session.mount(
"https://",
HTTPAdapter(
max_retries=Retry(
total=3,
connect=2,
read=1,
backoff_factor=0.5,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["GET"],
raise_on_status=False,
respect_retry_after_header=False,
)
),
)
Comment on lines +44 to +59

# function for decoding response text into utf-8 or utf-16
def _decode_response_text(res):
Expand Down Expand Up @@ -68,10 +99,12 @@ def _get_thumbs(data):
vimeo_id_regex = re.compile(r"(?:/video/)(\d+)")
vimeo_id = vimeo_id_regex.findall(data)[0]
# make an API call to get thumbnail URL
vimeo_request = http.request(
"GET", "https://vimeo.com/api/v2/video/" + vimeo_id + ".json"
vimeo_request = session.get(
"https://vimeo.com/api/v2/video/" + vimeo_id + ".json",
timeout=REQUEST_TIMEOUT,
)
data = json.loads(vimeo_request.data.decode("utf-8"))
vimeo_request.raise_for_status()
data = vimeo_request.json()
video_thumb = data[0]["thumbnail_large"]
else:
# the thumbs parameter is True, but the APOD for the date is not a video, output nothing
Expand All @@ -94,8 +127,7 @@ def _get_apod_chars(dt, thumbs):
else:
apod_url = "%sastropix.html" % BASE
LOG.debug("OPENING URL:" + apod_url)
res = requests.get(apod_url)
page_text = _decode_response_text(res)
res = session.get(apod_url, timeout=REQUEST_TIMEOUT)
if res.status_code == 404:
return None
# LOG.error(f'No APOD entry for URL: {apod_url}')
Expand All @@ -108,6 +140,11 @@ def _get_apod_chars(dt, thumbs):

# return default_obj_props

# surface upstream failures that persist after retries as a clear error
# instead of attempting to parse an error page
res.raise_for_status()
page_text = _decode_response_text(res)

soup = BeautifulSoup(page_text, "html.parser")
LOG.debug("getting the data url")
hd_data = None
Expand Down
124 changes: 124 additions & 0 deletions tests/apod/test_http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# coding= utf-8
import unittest
from datetime import datetime
from unittest.mock import MagicMock, patch

import requests

from apod import utility


def _mock_response(status_code=200, content=b"", apparent_encoding="utf-8"):
res = MagicMock(spec=requests.Response)
res.status_code = status_code
res.content = content
res.text = content.decode("utf-8", errors="replace")
res.apparent_encoding = apparent_encoding
if status_code >= 400:
res.raise_for_status.side_effect = requests.HTTPError(
"%s Error" % status_code, response=res
)
else:
res.raise_for_status.return_value = None
return res


class TestOutboundRequests(unittest.TestCase):
"""Offline tests for timeout/retry hardening of requests made to the
backing APOD service. Regression tests for connection timeout issues
(issue #163)."""

def test_apod_page_request_uses_timeout(self):
"""The request for the APOD page must carry an explicit timeout so a
hung upstream connection cannot hold a worker indefinitely."""
with patch.object(utility, "session") as mock_session:
mock_session.get.return_value = _mock_response(status_code=404)
result = utility._get_apod_chars(datetime(2020, 1, 1), thumbs=False)

self.assertIsNone(result)
_, kwargs = mock_session.get.call_args
self.assertEqual(kwargs.get("timeout"), utility.REQUEST_TIMEOUT)

def test_missing_page_returns_none(self):
"""A 404 from the backing service must still map to None (which the
application layer turns into a 404 response)."""
with patch.object(utility, "session") as mock_session:
mock_session.get.return_value = _mock_response(status_code=404)
self.assertIsNone(
utility._get_apod_chars(datetime(2020, 1, 1), thumbs=False)
)

def test_persistent_upstream_error_raises(self):
"""A 5xx that survives the retries must raise instead of handing an
upstream error page to the parser."""
with patch.object(utility, "session") as mock_session:
mock_session.get.return_value = _mock_response(status_code=503)
with self.assertRaises(requests.HTTPError):
utility._get_apod_chars(datetime(2020, 1, 1), thumbs=False)

def test_session_is_configured_to_retry_transient_failures(self):
"""The shared session must retry transient upstream failures with
backoff before giving up."""
adapter = utility.session.get_adapter(utility.BASE)
retries = adapter.max_retries

self.assertGreaterEqual(retries.total, 1)
self.assertGreater(retries.backoff_factor, 0)
for status in (500, 502, 503, 504):
self.assertIn(status, retries.status_forcelist)
self.assertIn("GET", retries.allowed_methods)
# raise_on_status=False is load-bearing: it makes the session hand the
# final 5xx response back so raise_for_status() in _get_apod_chars is
# what surfaces the error (instead of urllib3 raising RetryError).
self.assertFalse(retries.raise_on_status)
# Retry-After must be ignored: a server-specified delay on a 429/503
# would otherwise sleep the worker past gunicorn's timeout.
self.assertFalse(retries.respect_retry_after_header)

def test_hung_connection_budget_fits_worker_timeout(self):
"""Worst-case wall-clock for hung upstream connections must stay
below gunicorn's default 30s worker timeout, otherwise the worker is
killed mid-retry and the client sees a dropped connection."""
retries = utility.session.get_adapter(utility.BASE).max_retries

def backoff_sleeps(num_retries):
# urllib3 2.x sleeps 0 before the first retry, then
# backoff_factor * 2^(n-1) before retry n
return sum(
retries.backoff_factor * (2**n) for n in range(1, num_retries)
)

connect_attempts = min(retries.connect, retries.total) + 1
worst_connect_hang = (
connect_attempts * utility.CONNECT_TIMEOUT
+ backoff_sleeps(connect_attempts - 1)
)
self.assertLess(worst_connect_hang, 30)

read_attempts = min(retries.read, retries.total) + 1
worst_read_hang = read_attempts * (
utility.CONNECT_TIMEOUT + utility.READ_TIMEOUT
) + backoff_sleeps(read_attempts - 1)
self.assertLess(worst_read_hang, 30)

def test_vimeo_thumbnail_request_uses_timeout(self):
"""The Vimeo thumbnail lookup must also carry an explicit timeout."""
vimeo_response = _mock_response()
vimeo_response.json.return_value = [
{"thumbnail_large": "https://i.vimeocdn.com/video/foo_640.jpg"}
]
with patch.object(utility, "session") as mock_session:
mock_session.get.return_value = vimeo_response
thumb = utility._get_thumbs("https://player.vimeo.com/video/12345")

self.assertEqual(thumb, "https://i.vimeocdn.com/video/foo_640.jpg")
_, kwargs = mock_session.get.call_args
self.assertEqual(kwargs.get("timeout"), utility.REQUEST_TIMEOUT)

def test_vimeo_thumbnail_upstream_error_raises(self):
"""A persistent upstream error from the Vimeo API must surface as a
clear HTTPError instead of a JSON parsing failure on an error page."""
with patch.object(utility, "session") as mock_session:
mock_session.get.return_value = _mock_response(status_code=503)
with self.assertRaises(requests.HTTPError):
utility._get_thumbs("https://player.vimeo.com/video/12345")
1 change: 0 additions & 1 deletion tests/apod/test_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/bin/sh/python
# coding= utf-8
import logging
import unittest
Expand Down
1 change: 0 additions & 1 deletion tests/apod/test_utility.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/bin/sh/python
# coding= utf-8
import logging
import unittest
Expand Down