ActivityPub Viewer

A small tool to view real-world ActivityPub objects as JSON! Enter a URL or username from Mastodon or a similar service below, and we'll send a request with the right Accept header to the server to view the underlying object.

Open in browser →
{ "@context": [ "https://www.w3.org/ns/activitystreams", { "ostatus": "http://ostatus.org#", "atomUri": "ostatus:atomUri", "inReplyToAtomUri": "ostatus:inReplyToAtomUri", "conversation": "ostatus:conversation", "sensitive": "as:sensitive", "toot": "http://joinmastodon.org/ns#", "votersCount": "toot:votersCount", "blurhash": "toot:blurhash", "focalPoint": { "@container": "@list", "@id": "toot:focalPoint" }, "Hashtag": "as:Hashtag" } ], "id": "https://mast.hpc.social/users/ProjectPhysX/statuses/114049419373978161", "type": "Note", "summary": null, "inReplyTo": null, "published": "2025-02-22T20:27:12Z", "url": "https://mast.hpc.social/@ProjectPhysX/114049419373978161", "attributedTo": "https://mast.hpc.social/users/ProjectPhysX", "to": [ "https://www.w3.org/ns/activitystreams#Public" ], "cc": [ "https://mast.hpc.social/users/ProjectPhysX/followers" ], "sensitive": false, "atomUri": "https://mast.hpc.social/users/ProjectPhysX/statuses/114049419373978161", "inReplyToAtomUri": null, "conversation": "tag:mast.hpc.social,2025-02-22:objectId=15573865:objectType=Conversation", "content": "<p>My OpenCL-Benchmark now uses the dp4a instruction on supported hardware (<a href=\"https://mast.hpc.social/tags/Nvidia\" class=\"mention hashtag\" rel=\"tag\">#<span>Nvidia</span></a> Pascal, <a href=\"https://mast.hpc.social/tags/Intel\" class=\"mention hashtag\" rel=\"tag\">#<span>Intel</span></a> <a href=\"https://mast.hpc.social/tags/Arc\" class=\"mention hashtag\" rel=\"tag\">#<span>Arc</span></a>, <a href=\"https://mast.hpc.social/tags/AMD\" class=\"mention hashtag\" rel=\"tag\">#<span>AMD</span></a> RDNA, or newer) to benchmark INT8 tghroughput.<br />dp4a is not exposed in <a href=\"https://mast.hpc.social/tags/OpenCL\" class=\"mention hashtag\" rel=\"tag\">#<span>OpenCL</span></a> C, but can still be used via inline PTX assembly and compiler pattern recognition. Even Nvidia&#39;s compiler will turn the emulation implementation into dp4a, but in some cases does so with a bunch of unnecessary shifts/permutations on inputs, so better use inline PTX directly. 🖖🧐<br /><a href=\"https://github.com/ProjectPhysX/OpenCL-Benchmark/releases/tag/v1.8\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"ellipsis\">github.com/ProjectPhysX/OpenCL</span><span class=\"invisible\">-Benchmark/releases/tag/v1.8</span></a></p>", "contentMap": { "en": "<p>My OpenCL-Benchmark now uses the dp4a instruction on supported hardware (<a href=\"https://mast.hpc.social/tags/Nvidia\" class=\"mention hashtag\" rel=\"tag\">#<span>Nvidia</span></a> Pascal, <a href=\"https://mast.hpc.social/tags/Intel\" class=\"mention hashtag\" rel=\"tag\">#<span>Intel</span></a> <a href=\"https://mast.hpc.social/tags/Arc\" class=\"mention hashtag\" rel=\"tag\">#<span>Arc</span></a>, <a href=\"https://mast.hpc.social/tags/AMD\" class=\"mention hashtag\" rel=\"tag\">#<span>AMD</span></a> RDNA, or newer) to benchmark INT8 tghroughput.<br />dp4a is not exposed in <a href=\"https://mast.hpc.social/tags/OpenCL\" class=\"mention hashtag\" rel=\"tag\">#<span>OpenCL</span></a> C, but can still be used via inline PTX assembly and compiler pattern recognition. Even Nvidia&#39;s compiler will turn the emulation implementation into dp4a, but in some cases does so with a bunch of unnecessary shifts/permutations on inputs, so better use inline PTX directly. 🖖🧐<br /><a href=\"https://github.com/ProjectPhysX/OpenCL-Benchmark/releases/tag/v1.8\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"ellipsis\">github.com/ProjectPhysX/OpenCL</span><span class=\"invisible\">-Benchmark/releases/tag/v1.8</span></a></p>" }, "attachment": [ { "type": "Document", "mediaType": "image/png", "url": "https://mast.hpc.social/system/media_attachments/files/114/049/395/681/477/174/original/f04ea73c2c9037fc.png", "name": "dp4a implementation in OpenCL, using either inline PTX assembly on Nvidia GPUs with at least compute capability 6.1, or fallback emulatuion which compilers may turn into dp4a via pattern recognition.", "blurhash": "U04ec$~X$-x[?axIV_bY009ZbCWA0MOSxsep", "focalPoint": [ -1, 0.01 ], "width": 2125, "height": 546 }, { "type": "Document", "mediaType": "image/png", "url": "https://mast.hpc.social/system/media_attachments/files/114/049/396/095/194/040/original/bcb29ae2af56ab24.png", "name": "INT8 benchmark on Nvidia H100 SXM5 80GB HBM3. dp4a ~quadruples INT8 throughput over char4 multiplication/addition.", "blurhash": "U16kbW~q~qIAE2%N9Ft8Ek-;IUD%#5Rj-;j?", "focalPoint": [ 0, 0 ], "width": 1102, "height": 967 } ], "tag": [ { "type": "Hashtag", "href": "https://mast.hpc.social/tags/nvidia", "name": "#nvidia" }, { "type": "Hashtag", "href": "https://mast.hpc.social/tags/intel", "name": "#intel" }, { "type": "Hashtag", "href": "https://mast.hpc.social/tags/arc", "name": "#arc" }, { "type": "Hashtag", "href": "https://mast.hpc.social/tags/amd", "name": "#amd" }, { "type": "Hashtag", "href": "https://mast.hpc.social/tags/opencl", "name": "#opencl" } ], "replies": { "id": "https://mast.hpc.social/users/ProjectPhysX/statuses/114049419373978161/replies", "type": "Collection", "first": { "type": "CollectionPage", "next": "https://mast.hpc.social/users/ProjectPhysX/statuses/114049419373978161/replies?only_other_accounts=true&page=true", "partOf": "https://mast.hpc.social/users/ProjectPhysX/statuses/114049419373978161/replies", "items": [] } }, "likes": { "id": "https://mast.hpc.social/users/ProjectPhysX/statuses/114049419373978161/likes", "type": "Collection", "totalItems": 5 }, "shares": { "id": "https://mast.hpc.social/users/ProjectPhysX/statuses/114049419373978161/shares", "type": "Collection", "totalItems": 1 } }