Skip to content
This repository has been archived by the owner on Mar 12, 2024. It is now read-only.

Commit

Permalink
Convert between annexb, hvc1 and hev1 stream structures (#5)
Browse files Browse the repository at this point in the history
* Add decoder configuration record

* Convert between annexb and hevc elementary streams

* Add tests

* Fix reading of sps reference picture
  • Loading branch information
gBillal authored Sep 13, 2023
1 parent fdce4c8 commit 6e43c4b
Show file tree
Hide file tree
Showing 36 changed files with 1,320 additions and 429 deletions.
546 changes: 397 additions & 149 deletions lib/membrane_h265_plugin/parser.ex

Large diffs are not rendered by default.

62 changes: 28 additions & 34 deletions lib/membrane_h265_plugin/parser/au_splitter.ex
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ defmodule Membrane.H265.Parser.AUSplitter do
nalus_acc: [NALu.t()],
fsm_state: :first | :second,
previous_nalu: NALu.t() | nil,
access_units_to_output: [access_unit_t()]
access_units_to_output: [access_unit()]
}

@enforce_keys [
Expand Down Expand Up @@ -50,7 +50,7 @@ defmodule Membrane.H265.Parser.AUSplitter do
@typedoc """
A type representing an access unit - a list of logically associated NAL units.
"""
@type access_unit_t() :: list(NALu.t())
@type access_unit() :: list(NALu.t())

# split/2 defines a finite state machine with two states: :first and :second.
# The state :first describes the state before reaching the primary coded picture NALu of a given access unit.
Expand All @@ -68,13 +68,25 @@ defmodule Membrane.H265.Parser.AUSplitter do
describes the state after processing the first segment of the coded picture of a given
access unit.
"""
@spec split(list(NALu.t()), t()) :: {list(access_unit_t()), t()}
def split(nalus, state)
@spec split([NALu.t()], boolean(), t()) :: {[access_unit()], t()}
def split(nalus, assume_au_aligned \\ false, state) do
state = do_split(nalus, state)

{aus, state} =
if assume_au_aligned do
{state.access_units_to_output ++ [state.nalus_acc],
%__MODULE__{state | access_units_to_output: [], nalus_acc: []}}
else
{state.access_units_to_output, %__MODULE__{state | access_units_to_output: []}}
end

{Enum.reject(aus, &Enum.empty?/1), state}
end

def split([first_nalu | rest_nalus], %{fsm_state: :first} = state) do
defp do_split([first_nalu | rest_nalus], %{fsm_state: :first} = state) do
cond do
access_unit_first_slice_segment?(first_nalu) ->
split(
do_split(
rest_nalus,
%__MODULE__{
state
Expand All @@ -88,23 +100,23 @@ defmodule Membrane.H265.Parser.AUSplitter do
first_nalu.type in @non_vcl_nalus_at_au_beginning or
NALu.int_type(first_nalu) in 41..44 or
NALu.int_type(first_nalu) in 48..55 ->
split(
do_split(
rest_nalus,
%__MODULE__{state | nalus_acc: state.nalus_acc ++ [first_nalu]}
)

true ->
Logger.warning("AUSplitter: Improper transition")
return(state)
do_split(rest_nalus, state)
end
end

def split([first_nalu | rest_nalus], %{fsm_state: :second} = state) do
defp do_split([first_nalu | rest_nalus], %{fsm_state: :second} = state) do
previous_nalu = state.previous_nalu

cond do
first_nalu.type == :aud or first_nalu.type in @non_vcl_nalus_at_au_beginning ->
split(
do_split(
rest_nalus,
%__MODULE__{
state
Expand All @@ -115,7 +127,7 @@ defmodule Membrane.H265.Parser.AUSplitter do
)

access_unit_first_slice_segment?(first_nalu) ->
split(
do_split(
rest_nalus,
%__MODULE__{
state
Expand All @@ -129,7 +141,7 @@ defmodule Membrane.H265.Parser.AUSplitter do
first_nalu.type in @non_vcl_nalus_at_au_end or
NALu.int_type(first_nalu) in 45..47 or
NALu.int_type(first_nalu) in 56..63 ->
split(
do_split(
rest_nalus,
%__MODULE__{
state
Expand All @@ -140,34 +152,16 @@ defmodule Membrane.H265.Parser.AUSplitter do

true ->
Logger.warning("AUSplitter: Improper transition")
return(state)
do_split(rest_nalus, state)
end
end

def split([], state) do
{state.access_units_to_output |> Enum.filter(&(&1 != [])),
%__MODULE__{state | access_units_to_output: []}}
end

defp return(state) do
{state.access_units_to_output |> Enum.filter(&(&1 != [])),
%__MODULE__{state | access_units_to_output: []}}
end

@doc """
Returns a list of NAL units which are hold in access unit splitter's state accumulator
and sets that accumulator empty.
These NAL units aren't proved to form a new access units and that is why they haven't yet been
output by `Membrane.H265.Parser.AUSplitter.split/2`.
"""
@spec flush(t()) :: {list(NALu.t()), t()}
def flush(state) do
{state.nalus_acc, %{state | nalus_acc: []}}
defp do_split([], state) do
state
end

defp access_unit_first_slice_segment?(nalu) do
nalu.type in @vcl_nalus and
nalu.parsed_fields.first_slice_segment_in_pic_flag == 1
nalu.parsed_fields[:first_slice_segment_in_pic_flag] == 1
end
end
155 changes: 155 additions & 0 deletions lib/membrane_h265_plugin/parser/decoder_configuration_record.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
defmodule Membrane.H265.Parser.DecoderConfigurationRecord do
@moduledoc """
Utility functions for parsing and generating HEVC Configuration Record.
The structure of the record is described in section 8.3.3.1.1 of MPEG-4 part 15 (ISO/IEC 14496-15 Edition 2017-02).
"""

alias Membrane.H265.Parser
alias Membrane.H265.Parser.NALu

@enforce_keys [
:vpss,
:spss,
:ppss,
:profile_space,
:tier_flag,
:profile_idc,
:profile_compatibility_flags,
:constraint_indicator_flags,
:level_idc,
:temporal_id_nested,
:num_temporal_layers,
:chroma_format_idc,
:bit_depth_luma_minus8,
:bit_depth_chroma_minus8,
:nalu_length_size
]
defstruct @enforce_keys

@typedoc "Structure representing the Decoder Configuartion Record"
@type t() :: %__MODULE__{
vpss: [binary()],
spss: [binary()],
ppss: [binary()],
profile_space: non_neg_integer(),
tier_flag: non_neg_integer(),
profile_idc: non_neg_integer(),
profile_compatibility_flags: non_neg_integer(),
constraint_indicator_flags: non_neg_integer(),
level_idc: non_neg_integer(),
chroma_format_idc: non_neg_integer(),
bit_depth_luma_minus8: non_neg_integer(),
bit_depth_chroma_minus8: non_neg_integer(),
temporal_id_nested: non_neg_integer(),
num_temporal_layers: non_neg_integer(),
nalu_length_size: non_neg_integer()
}

@doc """
Generates a DCR based on given PPSs, SPSs and VPSs.
"""
@spec generate([NALu.t()], [NALu.t()], [NALu.t()], Parser.stream_structure()) :: binary() | nil
def generate(_vpss, [], _ppss, _stream_structure) do
nil
end

def generate(vpss, spss, ppss, {avc, nalu_length_size}) do
%NALu{
parsed_fields: %{
profile_space: profile_space,
tier_flag: tier_flag,
profile_idc: profile_idc,
profile_compatibility_flag: profile_compatibility_flag,
progressive_source_flag: progressive_source_flag,
interlaced_source_flag: interlaced_source_flag,
non_packed_constraint_flag: non_packed_constraint_flag,
frame_only_constraint_flag: frame_only_constraint_flag,
reserved_zero_44bits: reserved_zero_44bits,
level_idc: level_idc,
chroma_format_idc: chroma_format_idc,
bit_depth_luma_minus8: bit_depth_luma_minus8,
bit_depth_chroma_minus8: bit_depth_chroma_minus8,
temporal_id_nesting_flag: temporal_id_nested,
max_sub_layers_minus1: num_temporal_layers
}
} = List.last(spss)

common_config =
<<1, profile_space::2, tier_flag::1, profile_idc::5, profile_compatibility_flag::32,
progressive_source_flag::1, interlaced_source_flag::1, non_packed_constraint_flag::1,
frame_only_constraint_flag::1, reserved_zero_44bits::44, level_idc, 0b1111::4, 0::12,
0b111111::6, 0::2, 0b111111::6, chroma_format_idc::2, 0b11111::5,
bit_depth_luma_minus8::3, 0b11111::5, bit_depth_chroma_minus8::3, 0::19,
num_temporal_layers + 1::2, temporal_id_nested::1, nalu_length_size - 1::2-integer>>

cond do
avc == :hvc1 ->
<<common_config::binary, 3::8, encode_parameter_sets(vpss, 32)::binary,
encode_parameter_sets(spss, 33)::binary, encode_parameter_sets(ppss, 34)::binary>>

avc == :hev1 ->
<<common_config::binary, 0::8>>
end
end

defp encode_parameter_sets(pss, nalu_type) do
<<2::2, nalu_type::6, length(pss)::16>> <>
Enum.map_join(pss, &<<byte_size(&1.payload)::16-integer, &1.payload::binary>>)
end

@doc """
Parses the DCR.
"""
@spec parse(binary()) :: t()
def parse(
<<1::8, profile_space::2, tier_flag::1, profile_idc::5, profile_compatibility_flags::32,
constraint_indicator_flags::48, level_idc::8, 0b1111::4,
_min_spatial_segmentation_idc::12, 0b111111::6, _parallelism_type::2, 0b111111::6,
chroma_format_idc::2, 0b11111::5, bit_depth_luma_minus8::3, 0b11111::5,
bit_depth_chroma_minus8::3, _avg_frame_rate::16, _constant_frame_rate::2,
num_temporal_layers::3, temporal_id_nested::1, length_size_minus_one::2-integer,
num_of_arrays::8, rest::bitstring>>
) do
{vpss, spss, ppss} =
if num_of_arrays > 0 do
{vpss, rest} = parse_pss(rest, 32)
{spss, rest} = parse_pss(rest, 33)
{ppss, _rest} = parse_pss(rest, 34)

{vpss, spss, ppss}
else
{[], [], []}
end

%__MODULE__{
vpss: vpss,
spss: spss,
ppss: ppss,
profile_space: profile_space,
tier_flag: tier_flag,
profile_idc: profile_idc,
profile_compatibility_flags: profile_compatibility_flags,
constraint_indicator_flags: constraint_indicator_flags,
level_idc: level_idc,
temporal_id_nested: temporal_id_nested,
num_temporal_layers: num_temporal_layers,
chroma_format_idc: chroma_format_idc,
bit_depth_luma_minus8: bit_depth_luma_minus8,
bit_depth_chroma_minus8: bit_depth_chroma_minus8,
nalu_length_size: length_size_minus_one + 1
}
end

def parse(_data), do: {:error, :unknown_pattern}

defp parse_pss(<<_reserved::2, type::6, num_of_pss::16, rest::bitstring>>, type) do
do_parse_array(num_of_pss, rest)
end

defp do_parse_array(amount, rest, acc \\ [])
defp do_parse_array(0, rest, acc), do: {Enum.reverse(acc), rest}

defp do_parse_array(remaining, <<size::16, data::binary-size(size), rest::bitstring>>, acc),
do: do_parse_array(remaining - 1, rest, [data | acc])
end
7 changes: 4 additions & 3 deletions lib/membrane_h265_plugin/parser/format.ex
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ defmodule Membrane.H265.Parser.Format do
"""
@spec from_sps(
sps_nalu :: H265.Parser.NALu.t(),
output_raw_stream_structure :: H264.stream_structure(),
options_fields :: [
framerate: {pos_integer(), pos_integer()},
output_alignment: :au | :nalu
]
) :: H265.t()
def from_sps(sps_nalu, options_fields) do
def from_sps(sps_nalu, output_raw_stream_format, options_fields) do
sps = sps_nalu.parsed_fields

{sub_width_c, sub_height_c} =
Expand All @@ -34,7 +35,6 @@ defmodule Membrane.H265.Parser.Format do
1 -> {2, 2}
2 -> {2, 1}
3 -> {1, 1}
_other -> {nil, nil}
end

{width, height} =
Expand All @@ -55,7 +55,8 @@ defmodule Membrane.H265.Parser.Format do
profile: profile,
framerate: Keyword.get(options_fields, :framerate),
alignment: Keyword.get(options_fields, :output_alignment),
nalu_in_metadata?: true
nalu_in_metadata?: true,
stream_structure: output_raw_stream_format
}
end

Expand Down
11 changes: 7 additions & 4 deletions lib/membrane_h265_plugin/parser/nalu.ex
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@ defmodule Membrane.H265.Parser.NALu do
"""
@type t :: %__MODULE__{
parsed_fields: %{atom() => any()},
prefix_length: pos_integer(),
stripped_prefix: binary(),
type: Membrane.H265.Parser.NALuTypes.nalu_type(),
payload: binary(),
status: :valid | :error
status: :valid | :error,
timestamps: timestamps()
}

@enforce_keys [:parsed_fields, :prefix_length, :type, :payload, :status]
defstruct @enforce_keys
@type timestamps :: {pts :: integer() | nil, dts :: integer() | nil}

@enforce_keys [:parsed_fields, :stripped_prefix, :type, :payload, :status]
defstruct @enforce_keys ++ [timestamps: {nil, nil}]

@spec int_type(t()) :: non_neg_integer()
def int_type(%__MODULE__{parsed_fields: parsed_fields}), do: parsed_fields.nal_unit_type
Expand Down
Loading

0 comments on commit 6e43c4b

Please sign in to comment.