diff --git a/changelog.d/18911.feature b/changelog.d/18911.feature new file mode 100644 index 0000000000..ac576e2913 --- /dev/null +++ b/changelog.d/18911.feature @@ -0,0 +1,2 @@ +Add an Admin API that allows server admins to to query and investigate the metadata of local or cached remote media via +the `origin/media_id` identifier found in a [Matrix Content URI](https://spec.matrix.org/v1.14/client-server-api/#matrix-content-mxc-uris). \ No newline at end of file diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index 1177711c1e..be72b2e3e2 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -39,6 +39,40 @@ the use of the [List media uploaded by a user](user_admin_api.md#list-media-uploaded-by-a-user) Admin API. +## Query a piece of media by ID + +This API returns information about a piece of local or cached remote media given the origin server name and media id. If +information is requested for remote media which is not cached the endpoint will return 404. + +Request: +```http +GET /_synapse/admin/v1/media// +``` + +The API returns a JSON body with media info like the following: + +Response: +```json +{ + "media_info": { + "media_origin": "remote.com", + "user_id": null, + "media_id": "sdginwegWEG", + "media_type": "img/png", + "media_length": 67, + "upload_name": "test.png", + "created_ts": 300, + "filesystem_id": "wgeweg", + "url_cache": null, + "last_access_ts": 400, + "quarantined_by": null, + "authenticated": false, + "safe_from_quarantine": null, + "sha256": "ebf4f635a17d10d6eb46ba680b70142419aa3220f228001a036d311a22ee9d2a" + } +} +``` + # Quarantine media Quarantining media means that it is marked as inaccessible by users. It applies diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index 54791f43a7..a3c0b3036e 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -423,6 +423,23 @@ class MediaRepository: send_cors=True, ) + async def get_cached_remote_media_info( + self, origin: str, media_id: str + ) -> Optional[RemoteMedia]: + """ + Get cached remote media info for a given origin/media ID combo. If the requested + media is not found locally, it will not be requested over federation and the + call will return None. + + Args: + origin: The origin of the remote media + media_id: The media ID of the requested content + + Returns: + The info for the cached remote media or None if it was not found + """ + return await self.store.get_cached_remote_media(origin, media_id) + async def get_local_media_info( self, request: SynapseRequest, media_id: str, max_timeout_ms: int ) -> Optional[LocalMedia]: diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index 195f22a4c2..8732c0bf9d 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -18,7 +18,6 @@ # [This file includes modifications made by New Vector Limited] # # - import logging from http import HTTPStatus from typing import TYPE_CHECKING, Optional, Tuple @@ -41,7 +40,9 @@ from synapse.rest.admin._base import ( assert_requester_is_admin, assert_user_is_admin, ) -from synapse.storage.databases.main.media_repository import MediaSortOrder +from synapse.storage.databases.main.media_repository import ( + MediaSortOrder, +) from synapse.types import JsonDict, UserID if TYPE_CHECKING: @@ -50,6 +51,72 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +class QueryMediaById(RestServlet): + """ + Fetch info about a piece of local or cached remote media. + """ + + PATTERNS = admin_patterns("/media/(?P[^/]*)/(?P[^/]*)$") + + def __init__(self, hs: "HomeServer"): + self.store = hs.get_datastores().main + self.auth = hs.get_auth() + self.server_name = hs.hostname + self.hs = hs + self.media_repo = hs.get_media_repository() + + async def on_GET( + self, request: SynapseRequest, server_name: str, media_id: str + ) -> Tuple[int, JsonDict]: + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester) + + if not self.hs.is_mine_server_name(server_name): + remote_media_info = await self.media_repo.get_cached_remote_media_info( + server_name, media_id + ) + if remote_media_info is None: + raise NotFoundError("Unknown media") + resp = { + "media_origin": remote_media_info.media_origin, + "user_id": None, + "media_id": remote_media_info.media_id, + "media_type": remote_media_info.media_type, + "media_length": remote_media_info.media_length, + "upload_name": remote_media_info.upload_name, + "created_ts": remote_media_info.created_ts, + "filesystem_id": remote_media_info.filesystem_id, + "url_cache": None, + "last_access_ts": remote_media_info.last_access_ts, + "quarantined_by": remote_media_info.quarantined_by, + "authenticated": remote_media_info.authenticated, + "safe_from_quarantine": None, + "sha256": remote_media_info.sha256, + } + else: + local_media_info = await self.store.get_local_media(media_id) + if local_media_info is None: + raise NotFoundError("Unknown media") + resp = { + "media_origin": None, + "user_id": local_media_info.user_id, + "media_id": local_media_info.media_id, + "media_type": local_media_info.media_type, + "media_length": local_media_info.media_length, + "upload_name": local_media_info.upload_name, + "created_ts": local_media_info.created_ts, + "filesystem_id": None, + "url_cache": local_media_info.url_cache, + "last_access_ts": local_media_info.last_access_ts, + "quarantined_by": local_media_info.quarantined_by, + "authenticated": local_media_info.authenticated, + "safe_from_quarantine": local_media_info.safe_from_quarantine, + "sha256": local_media_info.sha256, + } + + return HTTPStatus.OK, {"media_info": resp} + + class QuarantineMediaInRoom(RestServlet): """Quarantines all media in a room so that no one can download it via this server. @@ -470,3 +537,4 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer) DeleteMediaByDateSize(hs).register(http_server) DeleteMediaByID(hs).register(http_server) UserMediaRestServlet(hs).register(http_server) + QueryMediaById(hs).register(http_server) diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 3bf9f67e09..86c09634cc 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -29,8 +29,9 @@ from twisted.web.resource import Resource import synapse.rest.admin from synapse.api.errors import Codes +from synapse.media._base import FileInfo from synapse.media.filepath import MediaFilePaths -from synapse.rest.client import login, profile, room +from synapse.rest.client import login, media, profile, room from synapse.server import HomeServer from synapse.util.clock import Clock @@ -47,6 +48,7 @@ class _AdminMediaTests(unittest.HomeserverTestCase): synapse.rest.admin.register_servlets, synapse.rest.admin.register_servlets_for_media_repo, login.register_servlets, + media.register_servlets, ] def create_resource_dict(self) -> Dict[str, Resource]: @@ -55,6 +57,164 @@ class _AdminMediaTests(unittest.HomeserverTestCase): return resources +class QueryMediaByIDTestCase(_AdminMediaTests): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.hs = hs + self.clock = clock + self.server_name = hs.hostname + self.store = hs.get_datastores().main + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + def _cache_remote_media(self, file_id: str) -> None: + file_info = FileInfo(server_name="remote.com", file_id=file_id) + + media_storage = self.hs.get_media_repository().media_storage + + ctx = media_storage.store_into_file(file_info) + (f, fname) = self.get_success(ctx.__aenter__()) + f.write(SMALL_PNG) + self.get_success(ctx.__aexit__(None, None, None)) + + self.get_success( + self.store.store_cached_remote_media( + origin="remote.com", + media_id=file_id, + media_type="image/png", + media_length=len(SMALL_PNG), + time_now_ms=self.clock.time_msec(), + upload_name="test.png", + filesystem_id=file_id, + sha256=file_id, + ) + ) + + channel = self.make_request( + "GET", + f"/_matrix/client/v1/media/download/remote.com/{file_id}", + shorthand=False, + access_token=self.admin_user_tok, + ) + + # Should be successful + self.assertEqual( + 200, + channel.code, + msg=("Expected to receive a 200 on accessing media"), + ) + + def test_no_auth(self) -> None: + """ + Try to query media without authentication. + """ + url = f"/_synapse/admin/v1/media/{self.server_name}/12345" + channel = self.make_request("GET", url) + + self.assertEqual( + 401, + channel.code, + msg=channel.json_body, + ) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self) -> None: + """ + If the user is not a server admin, an error is returned. + """ + self.other_user = self.register_user("user", "pass") + self.other_user_token = self.login("user", "pass") + + channel = self.make_request( + "GET", + f"/_synapse/admin/v1/media/{self.server_name}/12345", + access_token=self.other_user_token, + ) + + self.assertEqual(403, channel.code, msg=channel.json_body) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_local_media_does_not_exist(self) -> None: + """ + Tests that a lookup for local media that does not exist returns a 404 + """ + channel = self.make_request( + "GET", + f"/_synapse/admin/v1/media/{self.server_name}/12345", + access_token=self.admin_user_tok, + ) + + self.assertEqual(404, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + + def test_remote_media_does_not_exist(self) -> None: + """ + Tests that a lookup for remote media that is not cached returns a 404 + """ + channel = self.make_request( + "GET", + f"/_synapse/admin/v1/media/{self.server_name}/12345", + access_token=self.admin_user_tok, + ) + + self.assertEqual(404, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + + def test_query_local_media(self) -> None: + """ + Tests that querying an existing local media returns appropriate media info + """ + + # Upload some media into the room + response = self.helper.upload_media( + SMALL_PNG, + tok=self.admin_user_tok, + expect_code=200, + ) + # Extract media ID from the response + server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' + server_name, media_id = server_and_media_id.split("/") + self.assertEqual(server_name, self.server_name) + + channel = self.make_request( + "GET", + f"/_synapse/admin/v1/media/{self.server_name}/{media_id}", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["media_info"]["authenticated"], True) + self.assertEqual(channel.json_body["media_info"]["media_id"], media_id) + self.assertEqual( + channel.json_body["media_info"]["media_length"], len(SMALL_PNG) + ) + self.assertEqual( + channel.json_body["media_info"]["media_type"], "application/json" + ) + self.assertEqual(channel.json_body["media_info"]["upload_name"], "test.png") + self.assertEqual(channel.json_body["media_info"]["user_id"], "@admin:test") + + def test_query_remote_media(self) -> None: + file_id = "abcdefg12345" + self._cache_remote_media(file_id) + + channel = self.make_request( + "GET", + f"/_synapse/admin/v1/media/remote.com/{file_id}", + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["media_info"]["authenticated"], True) + self.assertEqual(channel.json_body["media_info"]["media_id"], file_id) + self.assertEqual( + channel.json_body["media_info"]["media_length"], len(SMALL_PNG) + ) + self.assertEqual(channel.json_body["media_info"]["media_type"], "image/png") + self.assertEqual(channel.json_body["media_info"]["upload_name"], "test.png") + self.assertEqual(channel.json_body["media_info"]["media_origin"], "remote.com") + + class DeleteMediaByIDTestCase(_AdminMediaTests): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.server_name = hs.hostname @@ -710,8 +870,8 @@ class QuarantineMediaByIDTestCase(_AdminMediaTests): self.assertFalse(channel.json_body) # Test that ALL similar media was quarantined. - for media in [self.media_id, self.media_id_2, self.media_id_3]: - media_info = self.get_success(self.store.get_local_media(media)) + for media_item in [self.media_id, self.media_id_2, self.media_id_3]: + media_info = self.get_success(self.store.get_local_media(media_item)) assert media_info is not None self.assertTrue(media_info.quarantined_by) @@ -731,8 +891,8 @@ class QuarantineMediaByIDTestCase(_AdminMediaTests): self.assertFalse(channel.json_body) # Test that ALL similar media is now reset. - for media in [self.media_id, self.media_id_2, self.media_id_3]: - media_info = self.get_success(self.store.get_local_media(media)) + for media_item in [self.media_id, self.media_id_2, self.media_id_3]: + media_info = self.get_success(self.store.get_local_media(media_item)) assert media_info is not None self.assertFalse(media_info.quarantined_by)