Hydrating Posts from Reddit

To hydrate Reddit posts from the output of the AIReD API, this Jupyter notebook can be used.

For simpler use case, this Python script, that does not require a Reddit API key, can be used (substitute the AIReD results in the URLS array):

import re
import requests

URLS = [
    "https://www.reddit.com/r/melbourne/comments/v1m5xm/definately_a_melbourne_hipster_set_up_shop_in/iaqx1gd/",
    "https://www.reddit.com/r/melbourne/comments/yp5lo0/why_is_the_fbi_hiring_in_melbourne/ivij7pt/",
    "https://www.reddit.com/r/melbourne/comments/v9m075/spotted_this_today_in_san_francisco_old_melbourne/ibyofoj/",
    "https://www.reddit.com/r/melbourne/comments/1hsy65v/thanks_for_having_me_melbourne_youre_alright/m5afff4/",
    "https://www.reddit.com/r/melbourne/comments/1agjvp5/i_think_they_want_us_to_die/kohc5zz/",
    "https://www.reddit.com/r/melbourne/comments/16fonq7/are_you_really_in_melbourne/k031riq/",
    "https://www.reddit.com/r/melbourne/comments/1p8ceuu/the_first_day_of_summer_melbourne_why_are_you/nr5vtxm/",
    "https://www.reddit.com/r/melbourne/comments/qpqtd8/just_witnessed_this_on_the_tan/hjw2koh/",
    "https://www.reddit.com/r/melbourne/comments/upc4nm/just_wondering_what_this_was_for/i8kdbmc/",
    "https://www.reddit.com/r/melbourne/comments/1c477dc/it_cost_100m_to_build_but_would_fetch_less_than/kzmlaab/",
    "https://www.reddit.com/r/melbourne/comments/op53nh/more_melbourne_doors_you_say_i_got_you/h63ru3v/",
    "https://www.reddit.com/r/melbourne/comments/1p57gcx/hot_dogs_in_melbourne/nqhd4z1/",
    "https://www.reddit.com/r/melbourne/comments/1ionqcp/its_the_rmelbourne_daily_discussion_thread_friday/mcll7s7/",
    "https://www.reddit.com/r/melbourne/comments/1f5nm1r/homm_melbourne_central/lkwqxof/",
    "https://www.reddit.com/r/melbourne/comments/1apl4kc/the_view_from_moonee_panns_just_before_the_rain/kq6xcp8/",
    "https://www.reddit.com/r/melbourne/comments/17pjcla/do_people_really_feel_cold_today/k86hxcz/",
    "https://www.reddit.com/r/melbourne/comments/17f9km7/wtf_melbourne/k68ez7c/",
    "https://www.reddit.com/r/melbourne/comments/wzovwr/poster_in_northcote_identifying_wannabe_nazis/im3qq3c/",
    "https://www.reddit.com/r/melbourne/comments/p6nyih/melbourne_to_all_of_nsw/h9gr2af/",
    "https://www.reddit.com/r/melbourne/comments/qdr3ao/the_best_statue_in_all_of_toorak/hhpa5j2/",
    "https://www.reddit.com/r/melbourne/comments/1g0baci/melbourne_in_the_firing_line/lr7l7uj/",
    "https://www.reddit.com/r/melbourne/comments/wslt23/describe_your_suburb_in_one_sentence_and_let/il02g8y/",
    "https://www.reddit.com/r/sydney/comments/1366im2/moved_here_from_melbourne/jinfaib/",
    "https://www.reddit.com/r/melbourne/comments/1f2d58k/hooning_in_north_melb_recently/lk5x7lm/",
    "https://www.reddit.com/r/perth/comments/1dzqnt2/perth_woman_allegedly_tries_to_shove_bag_of_meth/lcidawg/"
]


def extract_comment_id(url: str) -> str:
    """
    Extract the comment ID (last path segment) from a Reddit URL.
    """
    m = re.search(r"/comments/[^/]+/[^/]+/([^/]+)/?", url)
    if not m:
        raise ValueError(f"Could not extract comment ID from URL: {url}")
    return m.group(1)


def fetch_comment_text(url: str) -> str:
    """
    Fetch just the text of the comment using Reddit's public JSON endpoint.
    """
    json_url = url.rstrip("/") + ".json"

    resp = requests.get(
        json_url,
        headers={"User-Agent": "python:comment.fetcher:v1.0"}
    )
    resp.raise_for_status()
    data = resp.json()

    # Reddit returns:
    # [0] = submission listing
    # [1] = comment listing
    try:
        comment = data[1]["data"]["children"][0]["data"]
        return comment.get("body", "")
    except Exception:
        return "[ERROR: could not parse comment]"


if __name__ == "__main__":
    for url in URLS:
        cid = extract_comment_id(url)
        text = fetch_comment_text(url)
        print(f"--- {cid} ---")
        print(text)
        print()