Source: lib/transmuxer/h264.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.transmuxer.H264');
  7. goog.require('shaka.util.ExpGolomb');
  8. goog.require('shaka.util.Uint8ArrayUtils');
  9. /**
  10. * H.264 utils
  11. */
  12. shaka.transmuxer.H264 = class {
  13. /**
  14. * Read a sequence parameter set and return some interesting video
  15. * properties. A sequence parameter set is the H264 metadata that
  16. * describes the properties of upcoming video frames.
  17. *
  18. * @param {!Array<shaka.extern.VideoNalu>} nalus
  19. * @return {?{
  20. * height: number,
  21. * width: number,
  22. * videoConfig: !Uint8Array,
  23. * hSpacing: number,
  24. * vSpacing: number,
  25. * }}
  26. */
  27. static parseInfo(nalus) {
  28. const H264 = shaka.transmuxer.H264;
  29. if (!nalus.length) {
  30. return null;
  31. }
  32. const spsNalu = nalus.find((nalu) => {
  33. return nalu.type == H264.NALU_TYPE_SPS_;
  34. });
  35. const ppsNalu = nalus.find((nalu) => {
  36. return nalu.type == H264.NALU_TYPE_PPS_;
  37. });
  38. if (!spsNalu || !ppsNalu) {
  39. return null;
  40. }
  41. const expGolombDecoder = new shaka.util.ExpGolomb(spsNalu.data);
  42. // profile_idc
  43. const profileIdc = expGolombDecoder.readUnsignedByte();
  44. // constraint_set[0-5]_flag
  45. expGolombDecoder.readUnsignedByte();
  46. // level_idc u(8)
  47. expGolombDecoder.readUnsignedByte();
  48. // seq_parameter_set_id
  49. expGolombDecoder.skipExpGolomb();
  50. // some profiles have more optional data we don't need
  51. if (H264.PROFILES_WITH_OPTIONAL_SPS_DATA_.includes(profileIdc)) {
  52. const chromaFormatIdc = expGolombDecoder.readUnsignedExpGolomb();
  53. if (chromaFormatIdc === 3) {
  54. // separate_colour_plane_flag
  55. expGolombDecoder.skipBits(1);
  56. }
  57. // bit_depth_luma_minus8
  58. expGolombDecoder.skipExpGolomb();
  59. // bit_depth_chroma_minus8
  60. expGolombDecoder.skipExpGolomb();
  61. // qpprime_y_zero_transform_bypass_flag
  62. expGolombDecoder.skipBits(1);
  63. // seq_scaling_matrix_present_flag
  64. if (expGolombDecoder.readBoolean()) {
  65. const scalingListCount = (chromaFormatIdc !== 3) ? 8 : 12;
  66. for (let i = 0; i < scalingListCount; i++) {
  67. // seq_scaling_list_present_flag[ i ]
  68. if (expGolombDecoder.readBoolean()) {
  69. if (i < 6) {
  70. expGolombDecoder.skipScalingList(16);
  71. } else {
  72. expGolombDecoder.skipScalingList(64);
  73. }
  74. }
  75. }
  76. }
  77. }
  78. // log2_max_frame_num_minus4
  79. expGolombDecoder.skipExpGolomb();
  80. const picOrderCntType = expGolombDecoder.readUnsignedExpGolomb();
  81. if (picOrderCntType === 0) {
  82. // log2_max_pic_order_cnt_lsb_minus4
  83. expGolombDecoder.readUnsignedExpGolomb();
  84. } else if (picOrderCntType === 1) {
  85. // delta_pic_order_always_zero_flag
  86. expGolombDecoder.skipBits(1);
  87. // offset_for_non_ref_pic
  88. expGolombDecoder.skipExpGolomb();
  89. // offset_for_top_to_bottom_field
  90. expGolombDecoder.skipExpGolomb();
  91. const numRefFramesInPicOrderCntCycle =
  92. expGolombDecoder.readUnsignedExpGolomb();
  93. for (let i = 0; i < numRefFramesInPicOrderCntCycle; i++) {
  94. // offset_for_ref_frame[ i ]
  95. expGolombDecoder.skipExpGolomb();
  96. }
  97. }
  98. // max_num_ref_frames
  99. expGolombDecoder.skipExpGolomb();
  100. // gaps_in_frame_num_value_allowed_flag
  101. expGolombDecoder.skipBits(1);
  102. const picWidthInMbsMinus1 =
  103. expGolombDecoder.readUnsignedExpGolomb();
  104. const picHeightInMapUnitsMinus1 =
  105. expGolombDecoder.readUnsignedExpGolomb();
  106. const frameMbsOnlyFlag = expGolombDecoder.readBits(1);
  107. if (frameMbsOnlyFlag === 0) {
  108. // mb_adaptive_frame_field_flag
  109. expGolombDecoder.skipBits(1);
  110. }
  111. // direct_8x8_inference_flag
  112. expGolombDecoder.skipBits(1);
  113. let frameCropLeftOffset = 0;
  114. let frameCropRightOffset = 0;
  115. let frameCropTopOffset = 0;
  116. let frameCropBottomOffset = 0;
  117. // frame_cropping_flag
  118. if (expGolombDecoder.readBoolean()) {
  119. frameCropLeftOffset = expGolombDecoder.readUnsignedExpGolomb();
  120. frameCropRightOffset = expGolombDecoder.readUnsignedExpGolomb();
  121. frameCropTopOffset = expGolombDecoder.readUnsignedExpGolomb();
  122. frameCropBottomOffset = expGolombDecoder.readUnsignedExpGolomb();
  123. }
  124. let hSpacing = 1;
  125. let vSpacing = 1;
  126. // vui_parameters_present_flag
  127. if (expGolombDecoder.readBoolean()) {
  128. // aspect_ratio_info_present_flag
  129. if (expGolombDecoder.readBoolean()) {
  130. const aspectRatioIdc = expGolombDecoder.readUnsignedByte();
  131. const hSpacingTable = [
  132. 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160, 4, 3, 2,
  133. ];
  134. const vSpacingTable = [
  135. 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99, 3, 2, 1,
  136. ];
  137. if (aspectRatioIdc > 0 && aspectRatioIdc <= 16) {
  138. hSpacing = hSpacingTable[aspectRatioIdc - 1];
  139. vSpacing = vSpacingTable[aspectRatioIdc - 1];
  140. } else if (aspectRatioIdc === 255) {
  141. hSpacing = expGolombDecoder.readBits(16);
  142. vSpacing = expGolombDecoder.readBits(16);
  143. }
  144. }
  145. }
  146. const height = ((2 - frameMbsOnlyFlag) *
  147. (picHeightInMapUnitsMinus1 + 1) * 16) - (frameCropTopOffset * 2) -
  148. (frameCropBottomOffset * 2);
  149. const width = ((picWidthInMbsMinus1 + 1) * 16) -
  150. frameCropLeftOffset * 2 - frameCropRightOffset * 2;
  151. // assemble the SPSs
  152. let sps = [];
  153. const spsData = spsNalu.fullData;
  154. sps.push((spsData.byteLength >>> 8) & 0xff);
  155. sps.push(spsData.byteLength & 0xff);
  156. sps = sps.concat(...spsData);
  157. // assemble the PPSs
  158. let pps = [];
  159. const ppsData = ppsNalu.fullData;
  160. pps.push((ppsData.byteLength >>> 8) & 0xff);
  161. pps.push(ppsData.byteLength & 0xff);
  162. pps = pps.concat(...ppsData);
  163. const videoConfig = new Uint8Array(
  164. [
  165. 0x01, // version
  166. sps[3], // profile
  167. sps[4], // profile compat
  168. sps[5], // level
  169. 0xfc | 3, // lengthSizeMinusOne, hard-coded to 4 bytes
  170. 0xe0 | 1, // 3bit reserved (111) + numOfSequenceParameterSets
  171. ].concat(sps).concat([
  172. 1, // numOfPictureParameterSets
  173. ]).concat(pps));
  174. return {
  175. height,
  176. width,
  177. videoConfig,
  178. hSpacing,
  179. vSpacing,
  180. };
  181. }
  182. /**
  183. * @param {!Array<shaka.extern.MPEG_PES>} videoData
  184. * @return {!Array<shaka.extern.VideoSample>}
  185. */
  186. static getVideoSamples(videoData) {
  187. const H264 = shaka.transmuxer.H264;
  188. /** @type {!Array<shaka.extern.VideoSample>} */
  189. const videoSamples = [];
  190. /** @type {?shaka.extern.VideoSample} */
  191. let lastVideoSample = null;
  192. /** @type {boolean} */
  193. let audFound = false;
  194. const addLastVideoSample = () => {
  195. if (!lastVideoSample) {
  196. return;
  197. }
  198. if (!lastVideoSample.nalus.length || !lastVideoSample.frame) {
  199. return;
  200. }
  201. const nalusData = [];
  202. for (const nalu of lastVideoSample.nalus) {
  203. const size = nalu.fullData.byteLength;
  204. const naluLength = new Uint8Array(4);
  205. naluLength[0] = (size >> 24) & 0xff;
  206. naluLength[1] = (size >> 16) & 0xff;
  207. naluLength[2] = (size >> 8) & 0xff;
  208. naluLength[3] = size & 0xff;
  209. nalusData.push(naluLength);
  210. nalusData.push(nalu.fullData);
  211. }
  212. lastVideoSample.data = shaka.util.Uint8ArrayUtils.concat(...nalusData);
  213. videoSamples.push(lastVideoSample);
  214. };
  215. const createLastVideoSample = (pes) => {
  216. lastVideoSample = {
  217. data: new Uint8Array([]),
  218. frame: false,
  219. isKeyframe: false,
  220. pts: pes.pts,
  221. dts: pes.dts,
  222. nalus: [],
  223. };
  224. };
  225. for (let i = 0; i < videoData.length; i++) {
  226. const pes = videoData[i];
  227. const nalus = pes.nalus;
  228. let spsFound = false;
  229. // If new NAL units found and last sample still there, let's push ...
  230. // This helps parsing streams with missing AUD
  231. // (only do this if AUD never found)
  232. if (lastVideoSample && nalus.length && !audFound) {
  233. addLastVideoSample();
  234. createLastVideoSample(pes);
  235. }
  236. for (const nalu of pes.nalus) {
  237. let push = false;
  238. switch (nalu.type) {
  239. case H264.NALU_TYPE_NDR_: {
  240. let isKeyframe = false;
  241. push = true;
  242. const data = nalu.data;
  243. // Only check slice type to detect KF in case SPS found in same
  244. // packet (any keyframe is preceded by SPS ...)
  245. if (spsFound && data.length > 4) {
  246. // retrieve slice type by parsing beginning of NAL unit (follow
  247. // H264 spec,slice_header definition) to detect keyframe embedded
  248. // in NDR
  249. const sliceType = new shaka.util.ExpGolomb(data).readSliceType();
  250. // 2 : I slice, 4 : SI slice, 7 : I slice, 9: SI slice
  251. // SI slice : A slice that is coded using intra prediction only
  252. // and using quantisation of the prediction samples.
  253. // An SI slice can be coded such that its decoded samples can be
  254. // constructed identically to an SP slice.
  255. // I slice: A slice that is not an SI slice that is decoded using
  256. // intra prediction only.
  257. if (sliceType === 2 || sliceType === 4 ||
  258. sliceType === 7 || sliceType === 9) {
  259. isKeyframe = true;
  260. }
  261. }
  262. if (isKeyframe) {
  263. // If we have non-keyframe data already, that cannot belong to
  264. // the same frame as a keyframe, so force a push
  265. if (lastVideoSample &&
  266. lastVideoSample.frame && !lastVideoSample.isKeyframe) {
  267. addLastVideoSample();
  268. lastVideoSample = null;
  269. }
  270. }
  271. if (!lastVideoSample) {
  272. createLastVideoSample(pes);
  273. }
  274. lastVideoSample.frame = true;
  275. lastVideoSample.isKeyframe = isKeyframe;
  276. break;
  277. }
  278. case H264.NALU_TYPE_IDR_: {
  279. push = true;
  280. // Handle PES not starting with AUD
  281. // If we have frame data already, that cannot belong to the same
  282. // frame, so force a push
  283. if (lastVideoSample &&
  284. lastVideoSample.frame && !lastVideoSample.isKeyframe) {
  285. addLastVideoSample();
  286. lastVideoSample = null;
  287. }
  288. if (!lastVideoSample) {
  289. createLastVideoSample(pes);
  290. }
  291. lastVideoSample.frame = true;
  292. lastVideoSample.isKeyframe = true;
  293. break;
  294. }
  295. case H264.NALU_TYPE_SEI_:
  296. push = true;
  297. break;
  298. case H264.NALU_TYPE_SPS_:
  299. push = true;
  300. spsFound = true;
  301. break;
  302. case H264.NALU_TYPE_PPS_:
  303. push = true;
  304. break;
  305. case H264.NALU_TYPE_AUD_:
  306. push = true;
  307. audFound = true;
  308. if (lastVideoSample && lastVideoSample.frame) {
  309. addLastVideoSample();
  310. lastVideoSample = null;
  311. }
  312. if (!lastVideoSample) {
  313. createLastVideoSample(pes);
  314. }
  315. break;
  316. case H264.NALU_TYPE_FILLER_DATA_:
  317. push = true;
  318. break;
  319. default:
  320. push = false;
  321. break;
  322. }
  323. if (lastVideoSample && push) {
  324. lastVideoSample.nalus.push(nalu);
  325. }
  326. }
  327. }
  328. // If last PES packet, push samples
  329. addLastVideoSample();
  330. return videoSamples;
  331. }
  332. };
  333. /**
  334. * NALU type for NDR for H.264.
  335. * @const {number}
  336. * @private
  337. */
  338. shaka.transmuxer.H264.NALU_TYPE_NDR_ = 0x01;
  339. /**
  340. * NALU type for Instantaneous Decoder Refresh (IDR) for H.264.
  341. * @const {number}
  342. * @private
  343. */
  344. shaka.transmuxer.H264.NALU_TYPE_IDR_ = 0x05;
  345. /**
  346. * NALU type for Supplemental Enhancement Information (SEI) for H.264.
  347. * @const {number}
  348. * @private
  349. */
  350. shaka.transmuxer.H264.NALU_TYPE_SEI_ = 0x06;
  351. /**
  352. * NALU type for Sequence Parameter Set (SPS) for H.264.
  353. * @const {number}
  354. * @private
  355. */
  356. shaka.transmuxer.H264.NALU_TYPE_SPS_ = 0x07;
  357. /**
  358. * NALU type for Picture Parameter Set (PPS) for H.264.
  359. * @const {number}
  360. * @private
  361. */
  362. shaka.transmuxer.H264.NALU_TYPE_PPS_ = 0x08;
  363. /**
  364. * NALU type for Access Unit Delimiter (AUD) for H.264.
  365. * @const {number}
  366. * @private
  367. */
  368. shaka.transmuxer.H264.NALU_TYPE_AUD_ = 0x09;
  369. /**
  370. * NALU type for Filler Data for H.264.
  371. * @const {number}
  372. * @private
  373. */
  374. shaka.transmuxer.H264.NALU_TYPE_FILLER_DATA_ = 0x0c;
  375. /**
  376. * Values of profile_idc that indicate additional fields are included in the
  377. * SPS.
  378. * see Recommendation ITU-T H.264 (4/2013)
  379. * 7.3.2.1.1 Sequence parameter set data syntax
  380. *
  381. * @const {!Array<number>}
  382. * @private
  383. */
  384. shaka.transmuxer.H264.PROFILES_WITH_OPTIONAL_SPS_DATA_ =
  385. [100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134];