Add an Admin API to query a piece of local or cached remote media by ID (#18911)

This commit is contained in:
Shay 2025-09-23 14:25:56 -07:00 committed by GitHub
parent 9680804496
commit 35c9cbb09d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 288 additions and 7 deletions

View File

@ -0,0 +1,2 @@
Add an Admin API that allows server admins to to query and investigate the metadata of local or cached remote media via
the `origin/media_id` identifier found in a [Matrix Content URI](https://spec.matrix.org/v1.14/client-server-api/#matrix-content-mxc-uris).

View File

@ -39,6 +39,40 @@ the use of the
[List media uploaded by a user](user_admin_api.md#list-media-uploaded-by-a-user)
Admin API.
## Query a piece of media by ID
This API returns information about a piece of local or cached remote media given the origin server name and media id. If
information is requested for remote media which is not cached the endpoint will return 404.
Request:
```http
GET /_synapse/admin/v1/media/<origin>/<media_id>
```
The API returns a JSON body with media info like the following:
Response:
```json
{
"media_info": {
"media_origin": "remote.com",
"user_id": null,
"media_id": "sdginwegWEG",
"media_type": "img/png",
"media_length": 67,
"upload_name": "test.png",
"created_ts": 300,
"filesystem_id": "wgeweg",
"url_cache": null,
"last_access_ts": 400,
"quarantined_by": null,
"authenticated": false,
"safe_from_quarantine": null,
"sha256": "ebf4f635a17d10d6eb46ba680b70142419aa3220f228001a036d311a22ee9d2a"
}
}
```
# Quarantine media
Quarantining media means that it is marked as inaccessible by users. It applies

View File

@ -423,6 +423,23 @@ class MediaRepository:
send_cors=True,
)
async def get_cached_remote_media_info(
self, origin: str, media_id: str
) -> Optional[RemoteMedia]:
"""
Get cached remote media info for a given origin/media ID combo. If the requested
media is not found locally, it will not be requested over federation and the
call will return None.
Args:
origin: The origin of the remote media
media_id: The media ID of the requested content
Returns:
The info for the cached remote media or None if it was not found
"""
return await self.store.get_cached_remote_media(origin, media_id)
async def get_local_media_info(
self, request: SynapseRequest, media_id: str, max_timeout_ms: int
) -> Optional[LocalMedia]:

View File

@ -18,7 +18,6 @@
# [This file includes modifications made by New Vector Limited]
#
#
import logging
from http import HTTPStatus
from typing import TYPE_CHECKING, Optional, Tuple
@ -41,7 +40,9 @@ from synapse.rest.admin._base import (
assert_requester_is_admin,
assert_user_is_admin,
)
from synapse.storage.databases.main.media_repository import MediaSortOrder
from synapse.storage.databases.main.media_repository import (
MediaSortOrder,
)
from synapse.types import JsonDict, UserID
if TYPE_CHECKING:
@ -50,6 +51,72 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
class QueryMediaById(RestServlet):
"""
Fetch info about a piece of local or cached remote media.
"""
PATTERNS = admin_patterns("/media/(?P<server_name>[^/]*)/(?P<media_id>[^/]*)$")
def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastores().main
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.hs = hs
self.media_repo = hs.get_media_repository()
async def on_GET(
self, request: SynapseRequest, server_name: str, media_id: str
) -> Tuple[int, JsonDict]:
requester = await self.auth.get_user_by_req(request)
await assert_user_is_admin(self.auth, requester)
if not self.hs.is_mine_server_name(server_name):
remote_media_info = await self.media_repo.get_cached_remote_media_info(
server_name, media_id
)
if remote_media_info is None:
raise NotFoundError("Unknown media")
resp = {
"media_origin": remote_media_info.media_origin,
"user_id": None,
"media_id": remote_media_info.media_id,
"media_type": remote_media_info.media_type,
"media_length": remote_media_info.media_length,
"upload_name": remote_media_info.upload_name,
"created_ts": remote_media_info.created_ts,
"filesystem_id": remote_media_info.filesystem_id,
"url_cache": None,
"last_access_ts": remote_media_info.last_access_ts,
"quarantined_by": remote_media_info.quarantined_by,
"authenticated": remote_media_info.authenticated,
"safe_from_quarantine": None,
"sha256": remote_media_info.sha256,
}
else:
local_media_info = await self.store.get_local_media(media_id)
if local_media_info is None:
raise NotFoundError("Unknown media")
resp = {
"media_origin": None,
"user_id": local_media_info.user_id,
"media_id": local_media_info.media_id,
"media_type": local_media_info.media_type,
"media_length": local_media_info.media_length,
"upload_name": local_media_info.upload_name,
"created_ts": local_media_info.created_ts,
"filesystem_id": None,
"url_cache": local_media_info.url_cache,
"last_access_ts": local_media_info.last_access_ts,
"quarantined_by": local_media_info.quarantined_by,
"authenticated": local_media_info.authenticated,
"safe_from_quarantine": local_media_info.safe_from_quarantine,
"sha256": local_media_info.sha256,
}
return HTTPStatus.OK, {"media_info": resp}
class QuarantineMediaInRoom(RestServlet):
"""Quarantines all media in a room so that no one can download it via
this server.
@ -470,3 +537,4 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer)
DeleteMediaByDateSize(hs).register(http_server)
DeleteMediaByID(hs).register(http_server)
UserMediaRestServlet(hs).register(http_server)
QueryMediaById(hs).register(http_server)

View File

@ -29,8 +29,9 @@ from twisted.web.resource import Resource
import synapse.rest.admin
from synapse.api.errors import Codes
from synapse.media._base import FileInfo
from synapse.media.filepath import MediaFilePaths
from synapse.rest.client import login, profile, room
from synapse.rest.client import login, media, profile, room
from synapse.server import HomeServer
from synapse.util.clock import Clock
@ -47,6 +48,7 @@ class _AdminMediaTests(unittest.HomeserverTestCase):
synapse.rest.admin.register_servlets,
synapse.rest.admin.register_servlets_for_media_repo,
login.register_servlets,
media.register_servlets,
]
def create_resource_dict(self) -> Dict[str, Resource]:
@ -55,6 +57,164 @@ class _AdminMediaTests(unittest.HomeserverTestCase):
return resources
class QueryMediaByIDTestCase(_AdminMediaTests):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.hs = hs
self.clock = clock
self.server_name = hs.hostname
self.store = hs.get_datastores().main
self.admin_user = self.register_user("admin", "pass", admin=True)
self.admin_user_tok = self.login("admin", "pass")
def _cache_remote_media(self, file_id: str) -> None:
file_info = FileInfo(server_name="remote.com", file_id=file_id)
media_storage = self.hs.get_media_repository().media_storage
ctx = media_storage.store_into_file(file_info)
(f, fname) = self.get_success(ctx.__aenter__())
f.write(SMALL_PNG)
self.get_success(ctx.__aexit__(None, None, None))
self.get_success(
self.store.store_cached_remote_media(
origin="remote.com",
media_id=file_id,
media_type="image/png",
media_length=len(SMALL_PNG),
time_now_ms=self.clock.time_msec(),
upload_name="test.png",
filesystem_id=file_id,
sha256=file_id,
)
)
channel = self.make_request(
"GET",
f"/_matrix/client/v1/media/download/remote.com/{file_id}",
shorthand=False,
access_token=self.admin_user_tok,
)
# Should be successful
self.assertEqual(
200,
channel.code,
msg=("Expected to receive a 200 on accessing media"),
)
def test_no_auth(self) -> None:
"""
Try to query media without authentication.
"""
url = f"/_synapse/admin/v1/media/{self.server_name}/12345"
channel = self.make_request("GET", url)
self.assertEqual(
401,
channel.code,
msg=channel.json_body,
)
self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
def test_requester_is_no_admin(self) -> None:
"""
If the user is not a server admin, an error is returned.
"""
self.other_user = self.register_user("user", "pass")
self.other_user_token = self.login("user", "pass")
channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/{self.server_name}/12345",
access_token=self.other_user_token,
)
self.assertEqual(403, channel.code, msg=channel.json_body)
self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
def test_local_media_does_not_exist(self) -> None:
"""
Tests that a lookup for local media that does not exist returns a 404
"""
channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/{self.server_name}/12345",
access_token=self.admin_user_tok,
)
self.assertEqual(404, channel.code, msg=channel.json_body)
self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
def test_remote_media_does_not_exist(self) -> None:
"""
Tests that a lookup for remote media that is not cached returns a 404
"""
channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/{self.server_name}/12345",
access_token=self.admin_user_tok,
)
self.assertEqual(404, channel.code, msg=channel.json_body)
self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
def test_query_local_media(self) -> None:
"""
Tests that querying an existing local media returns appropriate media info
"""
# Upload some media into the room
response = self.helper.upload_media(
SMALL_PNG,
tok=self.admin_user_tok,
expect_code=200,
)
# Extract media ID from the response
server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://'
server_name, media_id = server_and_media_id.split("/")
self.assertEqual(server_name, self.server_name)
channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/{self.server_name}/{media_id}",
access_token=self.admin_user_tok,
)
self.assertEqual(200, channel.code, msg=channel.json_body)
self.assertEqual(channel.json_body["media_info"]["authenticated"], True)
self.assertEqual(channel.json_body["media_info"]["media_id"], media_id)
self.assertEqual(
channel.json_body["media_info"]["media_length"], len(SMALL_PNG)
)
self.assertEqual(
channel.json_body["media_info"]["media_type"], "application/json"
)
self.assertEqual(channel.json_body["media_info"]["upload_name"], "test.png")
self.assertEqual(channel.json_body["media_info"]["user_id"], "@admin:test")
def test_query_remote_media(self) -> None:
file_id = "abcdefg12345"
self._cache_remote_media(file_id)
channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/remote.com/{file_id}",
access_token=self.admin_user_tok,
)
self.assertEqual(200, channel.code, msg=channel.json_body)
self.assertEqual(channel.json_body["media_info"]["authenticated"], True)
self.assertEqual(channel.json_body["media_info"]["media_id"], file_id)
self.assertEqual(
channel.json_body["media_info"]["media_length"], len(SMALL_PNG)
)
self.assertEqual(channel.json_body["media_info"]["media_type"], "image/png")
self.assertEqual(channel.json_body["media_info"]["upload_name"], "test.png")
self.assertEqual(channel.json_body["media_info"]["media_origin"], "remote.com")
class DeleteMediaByIDTestCase(_AdminMediaTests):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.server_name = hs.hostname
@ -710,8 +870,8 @@ class QuarantineMediaByIDTestCase(_AdminMediaTests):
self.assertFalse(channel.json_body)
# Test that ALL similar media was quarantined.
for media in [self.media_id, self.media_id_2, self.media_id_3]:
media_info = self.get_success(self.store.get_local_media(media))
for media_item in [self.media_id, self.media_id_2, self.media_id_3]:
media_info = self.get_success(self.store.get_local_media(media_item))
assert media_info is not None
self.assertTrue(media_info.quarantined_by)
@ -731,8 +891,8 @@ class QuarantineMediaByIDTestCase(_AdminMediaTests):
self.assertFalse(channel.json_body)
# Test that ALL similar media is now reset.
for media in [self.media_id, self.media_id_2, self.media_id_3]:
media_info = self.get_success(self.store.get_local_media(media))
for media_item in [self.media_id, self.media_id_2, self.media_id_3]:
media_info = self.get_success(self.store.get_local_media(media_item))
assert media_info is not None
self.assertFalse(media_info.quarantined_by)