原文出处:WebRTC Video Receiver(五)-设置参考帧

1)前言

void RtpVideoStreamReceiver2::OnAssembledFrame(
    std::unique_ptr<video_coding::RtpFrameObject> frame) {
  RTC_DCHECK_RUN_ON(&worker_task_checker_);
  RTC_DCHECK(frame);
  .....
  //该模块默认未开启,新特性值得研究,顾名思义为丢包通知控制模块    
  // 可通过WebRTC-RtcpLossNotification/Enable开启,但是默认只支持VP8
  // SDP需要实现goog-lntf feedback    
  if (loss_notification_controller_ && descriptor) {
    loss_notification_controller_->OnAssembledFrame(
        frame->first_seq_num(), descriptor->frame_id,
        absl::c_linear_search(descriptor->decode_target_indications,
                              DecodeTargetIndication::kDiscardable),
        descriptor->dependencies);
  }         
  // If frames arrive before a key frame, they would not be decodable.
  // In that case, request a key frame ASAP.
  if (!has_received_frame_) {
    if (frame->FrameType() != VideoFrameType::kVideoFrameKey) {
      // |loss_notification_controller_|, if present, would have already
      // requested a key frame when the first packet for the non-key frame
      // had arrived, so no need to replicate the request.
      if (!loss_notification_controller_) {
        RequestKeyFrame();
      }
    }
    has_received_frame_ = true;
  }
  // Reset |reference_finder_| if |frame| is new and the codec have changed.
  if (current_codec_) {
    //每帧之间的时间戳不一样,当前帧的时间戳大于前一帧的时间戳(未环绕的情况下)  
    bool frame_is_newer =
        AheadOf(frame->Timestamp(), last_assembled_frame_rtp_timestamp_);
    if (frame->codec_type() != current_codec_) {
      if (frame_is_newer) {
        // When we reset the |reference_finder_| we don't want new picture ids
        // to overlap with old picture ids. To ensure that doesn't happen we
        // start from the |last_completed_picture_id_| and add an offset in case
        // of reordering.
        reference_finder_ =
            std::make_unique<video_coding::RtpFrameReferenceFinder>(
                this, last_completed_picture_id_ +
                          std::numeric_limits<uint16_t>::max());
        current_codec_ = frame->codec_type();
      } else {
        // Old frame from before the codec switch, discard it.
        return;
      }
    }
    if (frame_is_newer) {
      last_assembled_frame_rtp_timestamp_ = frame->Timestamp();
    }
  } else {
    current_codec_ = frame->codec_type();
    last_assembled_frame_rtp_timestamp_ = frame->Timestamp();
  }
  if (buffered_frame_decryptor_ != nullptr) {
    buffered_frame_decryptor_->ManageEncryptedFrame(std::move(frame));
  } else if (frame_transformer_delegate_) {
    frame_transformer_delegate_->TransformFrame(std::move(frame));
  } else {
    reference_finder_->ManageFrame(std::move(frame));
  }
}

2)ManageFrame工作流程

void RtpFrameReferenceFinder::ManageFrame(
    std::unique_ptr<RtpFrameObject> frame) {
  // If we have cleared past this frame, drop it.
  if (cleared_to_seq_num_ != -1 &&
      AheadOf<uint16_t>(cleared_to_seq_num_, frame->first_seq_num())) {
    return;
  }
  FrameDecision decision = ManageFrameInternal(frame.get());
  switch (decision) {
    case kStash:
      if (stashed_frames_.size() > kMaxStashedFrames)
        stashed_frames_.pop_back();
      stashed_frames_.push_front(std::move(frame));
      break;
    case kHandOff:
      HandOffFrame(std::move(frame));
      RetryStashedFrames();
      break;
    case kDrop:
      break;
  }
}
RtpFrameReferenceFinder::FrameDecision
RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) {
  ........
  switch (frame->codec_type()) {
    case kVideoCodecVP8:
      return ManageFrameVp8(frame);
    case kVideoCodecVP9:
      return ManageFrameVp9(frame);
    case kVideoCodecGeneric:
      if (auto* generic_header = absl::get_if<RTPVideoHeaderLegacyGeneric>(
              &frame->GetRtpVideoHeader().video_type_header)) {
        return ManageFramePidOrSeqNum(frame, generic_header->picture_id);
      }
      ABSL_FALLTHROUGH_INTENDED;
    default:
      return ManageFramePidOrSeqNum(frame, kNoPictureId);
  }
}

3)ManageFramePidOrSeqNum设置参考帧

RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameH264(
    RtpFrameObject* frame) {
  const FrameMarking& rtp_frame_marking = frame->GetFrameMarking();
  uint8_t tid = rtp_frame_marking.temporal_id;
  bool blSync = rtp_frame_marking.base_layer_sync;
  /*android 硬编的情况收到的tid位0xff,传入的kNoPictureId=-1,这是h264的特性*/ 
  if (tid == kNoTemporalIdx)
    return ManageFramePidOrSeqNum(std::move(frame), kNoPictureId);
  ....  
}

RtpFrameReferenceFinder::FrameDecision
RtpFrameReferenceFinder::ManageFramePidOrSeqNum(RtpFrameObject* frame,
                                                int picture_id) {
  // If |picture_id| is specified then we use that to set the frame references,
  // otherwise we use sequence number.
  // 1)确保非h264帧gop内维护的帧的连续性  
  if (picture_id != kNoPictureId) {
    frame->id.picture_id = unwrapper_.Unwrap(picture_id);
    frame->num_references =
        frame->frame_type() == VideoFrameType::kVideoFrameKey ? 0 : 1;
    frame->references[0] = frame->id.picture_id - 1;
    return kHandOff;
  }
  //2)判断是否为关键帧,其中frame_type在组帧的时候进行设置的
  if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
    last_seq_num_gop_.insert(std::make_pair(
        frame->last_seq_num(),//当前gop最后一个包的seq为key
        std::make_pair(frame->last_seq_num(), frame->last_seq_num())));
  }
  //3)如果到此为止还没有收到一帧关键帧,则存储该帧
  // We have received a frame but not yet a keyframe, stash this frame.
  if (last_seq_num_gop_.empty()) 
    return kStash;
  // Clean up info for old keyframes but make sure to keep info
  // for the last keyframe.
  // 4)清除老的gop frame->last_seq_num() - 100之前的所有都清除掉,但至少确保有一个。
  auto clean_to = last_seq_num_gop_.lower_bound(frame->last_seq_num() - 100);
  for (auto it = last_seq_num_gop_.begin();
        it != clean_to && last_seq_num_gop_.size() > 1;) {
    it = last_seq_num_gop_.erase(it);
  }
  // Find the last sequence number of the last frame for the keyframe
  // that this frame indirectly references.
  // 函数能走到这一步,gop 容器中是一定有存值的  
  //5.1) 如果关键帧的序号是大于该帧的序号的(未环绕的情况),那么该帧需要丢弃掉。  
  // 假设last_seq_num_gop_中存的是34号包,而本次来的帧的序号是10~16(非关键帧)。
  //5.2) 还有一种情况假设当前帧就是关键帧frame->last_seq_num()=34,假设事先last_seq_num_gop_存的是56号seq,由last_seq_num_gop_定义的排序规则,34号包被插入的时候会在头部,最终下面的条件依然成立。  
  auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num());
  if (seq_num_it == last_seq_num_gop_.begin()) {
    RTC_LOG(LS_WARNING) << "Generic frame with packet range ["
                        << frame->first_seq_num() << ", "
                        << frame->last_seq_num()
                        << "] has no GoP, dropping frame.";
    return kDrop;
  }
  //如果上述条件不成立这里则返回last_seq_num_gop_最后一个元素对应的迭代器
  //如果当前帧为关键帧的话那么seq_num_it为last_seq_num_gop_.end(),进行--操作后旧对应了最后一个关键帧  
  seq_num_it--;
  // Make sure the packet sequence numbers are continuous, otherwise stash
  // this frame.
  // 6) 该步用来判断该帧和上一帧的连续性 
  // last_picture_id_gop得到的是当前gop所维护的当前帧的上一帧(前向参考帧)的最后一个包的seq。  
  uint16_t last_picture_id_gop = seq_num_it->second.first;
  // last_picture_id_with_padding_gop得到的也是上一帧的最后一个包的seq。
  // 当前GOP的最新包的序列号,可能是last_picture_id_gop, 也可能是填充包.  
  uint16_t last_picture_id_with_padding_gop = seq_num_it->second.second;
  // 非关键帧判断seq连续性,  
  if (frame->frame_type() == VideoFrameType::kVideoFrameDelta) {
    //得到上一帧最后一个包的seq,当前帧的第一个包的seq -1 得到上一帧的最后一个seq  
    uint16_t prev_seq_num = frame->first_seq_num() - 1;
    // 如果不相等说明不连续,如果正常未丢包的情况下是一定会相等的。  
    if (prev_seq_num != last_picture_id_with_padding_gop)
      return kStash;
  }
  //检查当前帧最后一个seq是否大于所属gop 关键帧的最后一个seq
  RTC_DCHECK(AheadOrAt(frame->last_seq_num(), seq_num_it->first));
  // Since keyframes can cause reordering we can't simply assign the
  // picture id according to some incrementing counter.
  //7) 给RtpFrameObject的id.picture_id赋值
  // 如果为关键帧num_references为false,否则为true  
  frame->id.picture_id = frame->last_seq_num();
  frame->num_references =
      frame->frame_type() == VideoFrameType::kVideoFrameDelta;
  //上一帧最后一个包号  
  frame->references[0] = rtp_seq_num_unwrapper_.Unwrap(last_picture_id_gop);
  //这一步确保第6步的逻辑能跑通,否则第6不逻辑是跑不通的last_picture_id_表示的是当前帧的上一个关键帧的最后一个包的seq,frame->id.picture_id为当前帧的最后一个包的seq,正常情况AheadOf函数是会返回true的。  
  if (AheadOf<uint16_t>(frame->id.picture_id, last_picture_id_gop)) {
    //这里修改了容器last_seq_num_gop_对应关键帧的second变量,将当前帧最后一个包号的seq 赋值给他们 
    //正因为有这个操作,第6步才能顺利跑通  
    seq_num_it->second.first = frame->id.picture_id;
    seq_num_it->second.second = frame->id.picture_id;
  }
  last_picture_id_ = frame->id.picture_id;
  //更新填充包状态  
  UpdateLastPictureIdWithPadding(frame->id.picture_id);
  frame->id.picture_id = rtp_seq_num_unwrapper_.Unwrap(frame->id.picture_id);
  return kHandOff;
}

4) UpdateLastPictureIdWithPadding更新填充包状态

void RtpFrameReferenceFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) {
  //取第一个大于seq_num的对应的gop   
  auto gop_seq_num_it = last_seq_num_gop_.upper_bound(seq_num);
  // If this padding packet "belongs" to a group of pictures that we don't track
  // anymore, do nothing.
  if (gop_seq_num_it == last_seq_num_gop_.begin())
    return;
  --gop_seq_num_it;
  // Calculate the next contiuous sequence number and search for it in
  // the padding packets we have stashed.
  uint16_t next_seq_num_with_padding = gop_seq_num_it->second.second + 1;
  auto padding_seq_num_it =
      stashed_padding_.lower_bound(next_seq_num_with_padding);
  // While there still are padding packets and those padding packets are
  // continuous, then advance the "last-picture-id-with-padding" and remove
  // the stashed padding packet.
  while (padding_seq_num_it != stashed_padding_.end() &&
          *padding_seq_num_it == next_seq_num_with_padding) {
    gop_seq_num_it->second.second = next_seq_num_with_padding;
    ++next_seq_num_with_padding;
    padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it);
  }
  // In the case where the stream has been continuous without any new keyframes
  // for a while there is a risk that new frames will appear to be older than
  // the keyframe they belong to due to wrapping sequence number. In order
  // to prevent this we advance the picture id of the keyframe every so often.
  if (ForwardDiff(gop_seq_num_it->first, seq_num) > 10000) {
    auto save = gop_seq_num_it->second;
    last_seq_num_gop_.clear();
    last_seq_num_gop_[seq_num] = save;
  }
}

5) ManageFrame函数业务处理

void RtpFrameReferenceFinder::ManageFrame(
    std::unique_ptr<RtpFrameObject> frame) {
  .....
  FrameDecision decision = ManageFrameInternal(frame.get());
  switch (decision) {
    case kStash:
      if (stashed_frames_.size() > kMaxStashedFrames)//最大100
        stashed_frames_.pop_back();
      stashed_frames_.push_front(std::move(frame));
      break;
    case kHandOff:
      HandOffFrame(std::move(frame));
      RetryStashedFrames();
      break;
    case kDrop:
      break;
  }
}
void RtpFrameReferenceFinder::HandOffFrame(
    std::unique_ptr<RtpFrameObject> frame) {
  //picture_id_offset_为0  
  frame->id.picture_id += picture_id_offset_;
  for (size_t i = 0; i < frame->num_references; ++i) {
    frame->references[i] += picture_id_offset_;
  }
  frame_callback_->OnCompleteFrame(std::move(frame));
}
void RtpFrameReferenceFinder::RetryStashedFrames() {
  bool complete_frame = false;
  do {
    complete_frame = false;
    for (auto frame_it = stashed_frames_.begin();
          frame_it != stashed_frames_.end();) {
      FrameDecision decision = ManageFrameInternal(frame_it->get());
      switch (decision) {
        case kStash:
          ++frame_it;
          break;
        case kHandOff:
          complete_frame = true;
          HandOffFrame(std::move(*frame_it));
          RTC_FALLTHROUGH();
        case kDrop:
          frame_it = stashed_frames_.erase(frame_it);
      }
    }
  } while (complete_frame);
}

6)总结


原文出处:WebRTC Video Receiver(六)-FrameBuffer原理

1)前言

void RtpFrameReferenceFinder::HandOffFrame(
    std::unique_ptr<RtpFrameObject> frame) {
  //picture_id_offset_为0  
  frame->id.picture_id += picture_id_offset_;
  for (size_t i = 0; i < frame->num_references; ++i) {
    frame->references[i] += picture_id_offset_;
  }
  frame_callback_->OnCompleteFrame(std::move(frame));
}

void RtpVideoStreamReceiver2::OnCompleteFrame(
    std::unique_ptr<video_coding::EncodedFrame> frame) {
  RTC_DCHECK_RUN_ON(&worker_task_checker_);
  video_coding::RtpFrameObject* rtp_frame =
      static_cast<video_coding::RtpFrameObject*>(frame.get());
  //由上文可知,picture_id指向当前帧的最后一个包的seq number  
  last_seq_num_for_pic_id_[rtp_frame->id.picture_id] =
      rtp_frame->last_seq_num();
  last_completed_picture_id_ =
      std::max(last_completed_picture_id_, frame->id.picture_id);
  complete_frame_callback_->OnCompleteFrame(std::move(frame));
}

void VideoReceiveStream2::OnCompleteFrame(
    std::unique_ptr<video_coding::EncodedFrame> frame) {
  RTC_DCHECK_RUN_ON(&worker_sequence_checker_);
  // TODO(https://bugs.webrtc.org/9974): Consider removing this workaround.
  /*如果两次插入的视频帧的时间超过10分钟则清除该帧*/  
  int64_t time_now_ms = clock_->TimeInMilliseconds();
  if (last_complete_frame_time_ms_ > 0 &&//10 minutes.
      time_now_ms - last_complete_frame_time_ms_ > kInactiveStreamThresholdMs) {
    frame_buffer_->Clear();
  }
  last_complete_frame_time_ms_ = time_now_ms;
  //获取rtp头部的播放延迟,默认值为{-1,-1},该值得作用为啥? 
  const PlayoutDelay& playout_delay = frame->EncodedImage().playout_delay_;
  if (playout_delay.min_ms >= 0) {
    frame_minimum_playout_delay_ms_ = playout_delay.min_ms;
    UpdatePlayoutDelays();
  }
  if (playout_delay.max_ms >= 0) {
    frame_maximum_playout_delay_ms_ = playout_delay.max_ms;
    UpdatePlayoutDelays();
  }
  int64_t last_continuous_pid = frame_buffer_->InsertFrame(std::move(frame));
  if (last_continuous_pid != -1)
    rtp_video_stream_receiver_.FrameContinuous(last_continuous_pid);
}

2)InsertFrame插入原理

int64_t FrameBuffer::InsertFrame(std::unique_ptr<EncodedFrame> frame) {
  TRACE_EVENT0("webrtc", "FrameBuffer::InsertFrame");
  RTC_DCHECK(frame);
  rtc::CritScope lock(&crit_);
  const VideoLayerFrameId& id = frame->id;
  //得到上一个连续帧的pid  
  int64_t last_continuous_picture_id =
      !last_continuous_frame_ ? -1 : last_continuous_frame_->picture_id;
  //1) 和前向参考帧进行对比,如前向参考帧的seq和当前帧的seq进行比较。
  if (!ValidReferences(*frame)) {
    RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
                        << id.picture_id << ":"
                        << static_cast<int>(id.spatial_layer)
                        << ") has invalid frame references, dropping frame.";
    //正常情况下前向参考帧的seq比当前的seq肯定是要小的,这里如果发现该帧的seq 比前向参考帧的seq 还小的话直接丢弃。  
    return last_continuous_picture_id;
  }
  //最大800个frame,如果容器已经满了直接丢弃当前帧,若
  if (frames_.size() >= kMaxFramesBuffered) {
    //如果是关键帧这里将decoded_frames_history_中的历史记录清空,后续介绍。
    //同时也清空FrameBuffer所维护的frames_容器,所有待解码的帧先缓存到该容器。  
    if (frame->is_keyframe()) {
      RTC_LOG(LS_WARNING) << "Inserting keyframe (picture_id:spatial_id) ("
                          << id.picture_id << ":"
                          << static_cast<int>(id.spatial_layer)
                          << ") but buffer is full, clearing"
                              " buffer and inserting the frame.";
      ClearFramesAndHistory();
    } else {
      RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
                          << id.picture_id << ":"
                          << static_cast<int>(id.spatial_layer)
                          << ") could not be inserted due to the frame "
                              "buffer being full, dropping frame.";
      // 非关键帧,如果缓存容器满了的话直接返回上一个连续帧的pid  
      return last_continuous_picture_id;
    }
  }
  //得到最进一个发送到解码队列中的帧的picture_id,对于h264而言是帧最后一个包序列号seq
  auto last_decoded_frame = decoded_frames_history_.GetLastDecodedFrameId();
  //得到最进一个发送到解码队列中的帧的时间戳,该时间戳每一帧是不同的
  auto last_decoded_frame_timestamp =
      decoded_frames_history_.GetLastDecodedFrameTimestamp();
  //如果当前帧的最后一个包的seq(或者picture_id) < 最近解码帧的picture_id,说明有可能是出现乱序,也有可能是序列号环绕所致  
  if (last_decoded_frame && id <= *last_decoded_frame) {
    //如果当前帧的时间戳比上一次已经发送到解码队列的帧的时间戳还要新,可能是编码器重置或者序列号环绕的情况发生,这种情况下如果当前帧是关键帧的话还是可以继续进行解码的。  
    if (AheadOf(frame->Timestamp(), *last_decoded_frame_timestamp) &&
        frame->is_keyframe()) {
      // If this frame has a newer timestamp but an earlier picture id then we
      // assume there has been a jump in the picture id due to some encoder
      // reconfiguration or some other reason. Even though this is not according
      // to spec we can still continue to decode from this frame if it is a
      // keyframe.
      RTC_LOG(LS_WARNING)
          << "A jump in picture id was detected, clearing buffer.";
      //先清空之前缓存的所有帧和历史记录,为啥呢?因为要么编码器已经重置。或者跳帧的现象发生。
      ClearFramesAndHistory();
      last_continuous_picture_id = -1;
    } else {
      // 如果是乱序发生,而且不是关键帧,则丢弃该帧数据。  
      RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
                          << id.picture_id << ":"
                          << static_cast<int>(id.spatial_layer)
                          << ") inserted after frame ("
                          << last_decoded_frame->picture_id << ":"
                          << static_cast<int>(last_decoded_frame->spatial_layer)
                          << ") was handed off for decoding, dropping frame.";
      return last_continuous_picture_id;
    }
  }
  // Test if inserting this frame would cause the order of the frames to become
  // ambiguous (covering more than half the interval of 2^16). This can happen
  // when the picture id make large jumps mid stream.
  // 如果跳帧较大,清除之前的缓存从该帧开始解码。  
  if (!frames_.empty() && id < frames_.begin()->first &&
      frames_.rbegin()->first < id) {
    RTC_LOG(LS_WARNING)
        << "A jump in picture id was detected, clearing buffer.";
    ClearFramesAndHistory();
    last_continuous_picture_id = -1;
  }
  auto info = frames_.emplace(id, FrameInfo()).first;
  //这表明原先frames_容器中已经有该id的key,本次为重复插入,直接返回上一个连续帧的ID。  
  if (info->second.frame) {
    return last_continuous_picture_id;
  }
  //更新帧信息,如设置帧还未连续的参考帧数量,并建立被参考帧与参考他的帧之间的参考关系,用于当被参考帧有效时,更新参考他的帧的参考帧数量(为0则连续)
  // 以及可解码状态,该函数会更新last_continuous_frame_
  if (!UpdateFrameInfoWithIncomingFrame(*frame, info))
    return last_continuous_picture_id;
    //如果当前帧没有重传包的话,可以用于计算时延,timing_用于计算很多时延指标以及帧的预期渲染时间.
  if (!frame->delayed_by_retransmission())
    timing_->IncomingTimestamp(frame->Timestamp(), frame->ReceivedTime());
  if (stats_callback_ && IsCompleteSuperFrame(*frame)) {
    stats_callback_->OnCompleteFrame(frame->is_keyframe(), frame->size(),
                                      frame->contentType());
  }
  //将当前帧记录到缓存
  info->second.frame = std::move(frame);
  // 如果该帧的未连续的参考帧数量为0,说明当前帧已经连续,如关键帧或者当前P帧参考的上个P帧已经收到,本段代码需要先分析
  // UpdateFrameInfoWithIncomingFrame函数
  if (info->second.num_missing_continuous == 0) {
    info->second.continuous = true;
    //连续性状态传播,后面会分析
    PropagateContinuity(info);//本次插入的时候该函数正常情况下都会正常返回,啥都不做
    last_continuous_picture_id = last_continuous_frame_->picture_id;
    // Since we now have new continuous frames there might be a better frame
    // to return from NextFrame.
    if (callback_queue_) {
      callback_queue_->PostTask([this] {
        rtc::CritScope lock(&crit_);
        if (!callback_task_.Running())
          return;
        RTC_CHECK(frame_handler_);
        callback_task_.Stop();
        //触发解码任务,寻找待解码的帧,并将其发送到解码任务队列,后续会分析  
        StartWaitForNextFrameOnQueue();
      });
    }
  }
  //最终这里返回的是当前帧的picture_id
  return last_continuous_picture_id;
}

2.1)UpdateFrameInfoWithIncomingFrame更新参考帧信息

//参数info是表示当前帧在frame_容器中的位置对应的迭代器
bool FrameBuffer::UpdateFrameInfoWithIncomingFrame(const EncodedFrame& frame,
                                                    FrameMap::iterator info) {
  TRACE_EVENT0("webrtc", "FrameBuffer::UpdateFrameInfoWithIncomingFrame");
  const VideoLayerFrameId& id = frame.id;//VideoLayerFrameId
  auto last_decoded_frame = decoded_frames_history_.GetLastDecodedFrameId();
  RTC_DCHECK(!last_decoded_frame || *last_decoded_frame < info->first);
  struct Dependency {
    VideoLayerFrameId id;
    bool continuous;
  };
  //还未填充依赖  
  std::vector<Dependency> not_yet_fulfilled_dependencies;
  // Find all dependencies that have not yet been fulfilled.
  // 根据当前帧的参考帧数目进行遍历,该值在设置参考帧模块里面被设置,对于h264数据而言非关键帧的num_references=1 
  for (size_t i = 0; i < frame.num_references; ++i) {
    //构造零时参考帧id实例。  
    VideoLayerFrameId ref_key(frame.references[i], frame.id.spatial_layer);
    // Does |frame| depend on a frame earlier than the last decoded one?
    // 如果当前帧的参考帧的id等于或者小于最新的解码帧,则有可能是乱序问题,正常情况下,当前帧的参考帧要么已经被解码(等于)要么是还未解码(大于)。
    if (last_decoded_frame && ref_key <= *last_decoded_frame) {
      // Was that frame decoded? If not, this |frame| will never become
      // decodable.
      // 如果这个参考帧还未解码(乱序),那么这个参考帧将不再有机会被解码, 那么当前帧也无法被解码,
      // 返回失败,反之如果这个参考帧已经被解码了,则属于正常状态。  
      if (!decoded_frames_history_.WasDecoded(ref_key)) {
        int64_t now_ms = clock_->TimeInMilliseconds();
        if (last_log_non_decoded_ms_ + kLogNonDecodedIntervalMs < now_ms) {
          RTC_LOG(LS_WARNING)
              << "Frame with (picture_id:spatial_id) (" << id.picture_id << ":"
              << static_cast<int>(id.spatial_layer)
              << ") depends on a non-decoded frame more previous than"
                  " the last decoded frame, dropping frame.";
          last_log_non_decoded_ms_ = now_ms;
        }
        return false;
      }
    } else { //如果当前帧的参考帧比最新的解码帧的id要大,说明该参考帧可能还未连续,还未发送到解码队列。
      // 查询缓存
      auto ref_info = frames_.find(ref_key);
      //如果ref_info != frames_.end()说明当前帧的参考帧还在缓存当中,这里是判断当前帧的参考帧是否连续。
      //同时满足ref_info != frames_.end()和ref_info->second.continuous则表示该参考帧是联系的  
      bool ref_continuous =
          ref_info != frames_.end() && ref_info->second.continuous;
      // 该参考帧不管连续还是不连续都会插入到not_yet_fulfilled_dependencies临时依赖容器 
      not_yet_fulfilled_dependencies.push_back({ref_key, ref_continuous});
    }
  }// end for loop
  // Does |frame| depend on the lower spatial layer?
  if (frame.inter_layer_predicted) {
    VideoLayerFrameId ref_key(frame.id.picture_id, frame.id.spatial_layer - 1);
    auto ref_info = frames_.find(ref_key);
    bool lower_layer_decoded =
        last_decoded_frame && *last_decoded_frame == ref_key;
    bool lower_layer_continuous =
        lower_layer_decoded ||
        (ref_info != frames_.end() && ref_info->second.continuous);
    if (!lower_layer_continuous || !lower_layer_decoded) {
      not_yet_fulfilled_dependencies.push_back(
          {ref_key, lower_layer_continuous});
    }
  }
  //未连续参考帧计数器,初始值为not_yet_fulfilled_dependencies容器大小
  info->second.num_missing_continuous = not_yet_fulfilled_dependencies.size();
  //未解码参考帧计数器,当前帧还未发送到解码队列的参考帧个数,初始值也未容器大小
  info->second.num_missing_decodable = not_yet_fulfilled_dependencies.size();
  // 遍历not_yet_fulfilled_dependencies容器,根据内部元素的continuous值来更新info->second.num_missing_continuous
  // 的个数,因为在插入not_yet_fulfilled_dependencies容器的值其内部成员的continuous有可能为true也有可能为false  
  for (const Dependency& dep : not_yet_fulfilled_dependencies) {
    // 如果某个参考帧已经连续,则将当前帧记录未连续参考帧的计数减1  
    if (dep.continuous)
      --info->second.num_missing_continuous;
    // 建立参考帧->依赖帧反向关系,用于传播状态,此时的dep.id对应的是参考帧的id,对于H264而言应该就是前向参考帧的ID。
    // 这里是为当前帧的参考帧所管理的dependent_frames填充id,而该id为当前帧的id。  
    frames_[dep.id].dependent_frames.push_back(id);
  }
  return true;
}

2.2)PropagateContinuity连续性传播

//参数start是表示当前帧在frame_容器中的位置对应的迭代器
void FrameBuffer::PropagateContinuity(FrameMap::iterator start) {
  TRACE_EVENT0("webrtc", "FrameBuffer::PropagateContinuity");
  RTC_DCHECK(start->second.continuous);
  std::queue<FrameMap::iterator> continuous_frames;
  continuous_frames.push(start);
  // A simple BFS to traverse continuous frames.
  while (!continuous_frames.empty()) {
    auto frame = continuous_frames.front();
    continuous_frames.pop();
    if (!last_continuous_frame_ || *last_continuous_frame_ < frame->first) {
      last_continuous_frame_ = frame->first;
    }
    // Loop through all dependent frames, and if that frame no longer has
    // any unfulfilled dependencies then that frame is continuous as well.
    //   
    for (size_t d = 0; d < frame->second.dependent_frames.size(); ++d) {
      auto frame_ref = frames_.find(frame->second.dependent_frames[d]);
      RTC_DCHECK(frame_ref != frames_.end());
      // TODO(philipel): Look into why we've seen this happen.
      if (frame_ref != frames_.end()) {
        //对于h264数据而言num_missing_continuous的最大值为1
        --frame_ref->second.num_missing_continuous;
        if (frame_ref->second.num_missing_continuous == 0) {
          frame_ref->second.continuous = true;
          continuous_frames.push(frame_ref);
        }
      }
    }
  }
}

3)decode_queue_解码任务队列工作原理

void VideoReceiveStream2::Start() {
  RTC_DCHECK_RUN_ON(&worker_sequence_checker_);
  ....
  decode_queue_.PostTask([this] {
    RTC_DCHECK_RUN_ON(&decode_queue_);
    decoder_stopped_ = false;
    StartNextDecode();
  });
  ....  
}
void VideoReceiveStream2::StartNextDecode() {
  // Running on the decode thread.
  TRACE_EVENT0("webrtc", "VideoReceiveStream2::StartNextDecode");
  frame_buffer_->NextFrame(
      GetMaxWaitMs(), //本次任务执行,最多等待多长时间
      keyframe_required_, //本次任务执行是否需要请求关键帧
      &decode_queue_,//解码任务队列
      /* encoded frame handler */
      [this](std::unique_ptr<EncodedFrame> frame, ReturnReason res) {
        RTC_DCHECK_EQ(frame == nullptr, res == ReturnReason::kTimeout);
        RTC_DCHECK_EQ(frame != nullptr, res == ReturnReason::kFrameFound);
        decode_queue_.PostTask([this, frame = std::move(frame)]() mutable {
          RTC_DCHECK_RUN_ON(&decode_queue_);
          if (decoder_stopped_)
            return;
          if (frame) {
            HandleEncodedFrame(std::move(frame));
          } else {
            int64_t now_ms = clock_->TimeInMilliseconds();
            worker_thread_->PostTask(ToQueuedTask(
                task_safety_, [this, now_ms, wait_ms = GetMaxWaitMs()]() {
                  RTC_DCHECK_RUN_ON(&worker_sequence_checker_);
                  HandleFrameBufferTimeout(now_ms, wait_ms);
                }));
          }
          StartNextDecode();
        });
      });
}

4)FrameBuffer::NextFrame()函数工作流程

void FrameBuffer::NextFrame(
    int64_t max_wait_time_ms,//本次调度最多等待多少ms就认为是超时。
    bool keyframe_required,
    rtc::TaskQueue* callback_queue,
    std::function<void(std::unique_ptr<EncodedFrame>, ReturnReason)> handler) {
  RTC_DCHECK_RUN_ON(&callback_checker_);
  RTC_DCHECK(callback_queue->IsCurrent());
  TRACE_EVENT0("webrtc", "FrameBuffer::NextFrame");
  //当前时间+最大超时时间的毫秒数得到,本次调度的返回时间。  
  int64_t latest_return_time_ms =
      clock_->TimeInMilliseconds() + max_wait_time_ms;
  rtc::CritScope lock(&crit_);
  if (stopped_) {
    return;
  }
  //保存当前任务最大返回时间的相对时间值。  
  latest_return_time_ms_ = latest_return_time_ms;
  //当前任务是否要请求关键帧  
  keyframe_required_ = keyframe_required;
  //保存函数句柄,对应VideoReceiveStream2::StartNextDecode()函数中定义的外部大lamda匿名函数  
  frame_handler_ = handler;
  //保存解码循环队列指针  
  callback_queue_ = callback_queue;
  StartWaitForNextFrameOnQueue();
}
void FrameBuffer::StartWaitForNextFrameOnQueue() {
  RTC_DCHECK(callback_queue_);
  RTC_DCHECK(!callback_task_.Running());
  int64_t wait_ms = FindNextFrame(clock_->TimeInMilliseconds());
  callback_task_ = RepeatingTaskHandle::DelayedStart(
      callback_queue_->Get(), TimeDelta::Millis(wait_ms), [this] {
        RTC_DCHECK_RUN_ON(&callback_checker_);
        // If this task has not been cancelled, we did not get any new frames
        // while waiting. Continue with frame delivery.
        rtc::CritScope lock(&crit_);
        if (!frames_to_decode_.empty()) {
          // We have frames, deliver!
          frame_handler_(absl::WrapUnique(GetNextFrame()), kFrameFound);
          CancelCallback();
          return TimeDelta::Zero();  // Ignored.
        } else if (clock_->TimeInMilliseconds() >= latest_return_time_ms_) {
          // We have timed out, signal this and stop repeating.
          frame_handler_(nullptr, kTimeout);
          CancelCallback();
          return TimeDelta::Zero();  // Ignored.
        } else {
          // If there's no frames to decode and there is still time left, it
          // means that the frame buffer was cleared between creation and
          // execution of this task. Continue waiting for the remaining time.
          int64_t wait_ms = FindNextFrame(clock_->TimeInMilliseconds());
          return TimeDelta::Millis(wait_ms);
        }
      });
}

5)FrameBuffer::FindNextFrame()函数工作流程

int64_t FrameBuffer::FindNextFrame(int64_t now_ms) {
  //latest_return_time_ms_为本次任务最大的超时时间时间的相对值,这个计算得到最大的等待时间间隔  
  //该值在使用640*480@25fps的屏幕共享调试过程中有3000ms左右,也就是说最大可等待3s如果3秒还没找到合适的帧,那么本次调度就按照超时来算了。  
  int64_t wait_ms = latest_return_time_ms_ - now_ms;
  //首先清空frames_to_decode_,这说明每次是获取一帧数据,然后立马送到解码队列。  
  frames_to_decode_.clear();
  // |last_continuous_frame_| may be empty below, but nullopt is smaller
  // than everything else and loop will immediately terminate as expected.
  //循环遍历frames_集合,从头部到尾部,frame_it->first <= last_continuous_frame_,当前要送到解码队列的数据帧
  // 比上一次插入的数据的id要小或相等。  
  for (auto frame_it = frames_.begin();
        frame_it != frames_.end() && frame_it->first <= last_continuous_frame_;
        ++frame_it) {
    //如果当前一帧的参考帧不连续则重新遍历。++frame_it  
    if (!frame_it->second.continuous ||
        frame_it->second.num_missing_decodable > 0) {
      continue;
    }
    EncodedFrame* frame = frame_it->second.frame.get();
    //如果本次解码任务是要求请求关键帧,但是当前遍历出来的这一帧是P帧,则重新遍历++frame_it  
    if (keyframe_required_ && !frame->is_keyframe())
      continue;
    auto last_decoded_frame_timestamp =
        decoded_frames_history_.GetLastDecodedFrameTimestamp();
    // TODO(https://bugs.webrtc.org/9974): consider removing this check
    // as it may make a stream undecodable after a very long delay between
    // frames.
    // 根据每帧数据的rtp时间戳不相等,并且后一帧的时间戳要比前一帧的时间戳要大的原则,如果 
    // last_decoded_frame_timestamp上一次送到解码队列的一帧的时间戳比当前遍历出的时间戳还要大的话则重新遍历  
    if (last_decoded_frame_timestamp &&
        AheadOf(*last_decoded_frame_timestamp, frame->Timestamp())) {
      continue;
    }
    // Only ever return all parts of a superframe. Therefore skip this
    // frame if it's not a beginning of a superframe.
    // VPX相关处理.  
    if (frame->inter_layer_predicted) {
      continue;
    }
    // Gather all remaining frames for the same superframe.
    std::vector<FrameMap::iterator> current_superframe;
    //尾部插入  
    current_superframe.push_back(frame_it);
    // H264为true只有一层.  
    bool last_layer_completed = frame_it->second.frame->is_last_spatial_layer;
    FrameMap::iterator next_frame_it = frame_it;
    while (true) {
      ++next_frame_it;
      //对于H264这个判断会break;  
      if (next_frame_it == frames_.end() ||
          next_frame_it->first.picture_id != frame->id.picture_id ||
          !next_frame_it->second.continuous) {
        break;
      }
      // Check if the next frame has some undecoded references other than
      // the previous frame in the same superframe.
      size_t num_allowed_undecoded_refs =
          (next_frame_it->second.frame->inter_layer_predicted) ? 1 : 0;
      if (next_frame_it->second.num_missing_decodable >
          num_allowed_undecoded_refs) {
        break;
      }
      // All frames in the superframe should have the same timestamp.
      if (frame->Timestamp() != next_frame_it->second.frame->Timestamp()) {
        RTC_LOG(LS_WARNING) << "Frames in a single superframe have different"
                                " timestamps. Skipping undecodable superframe.";
        break;
      }
      current_superframe.push_back(next_frame_it);
      last_layer_completed = next_frame_it->second.frame->is_last_spatial_layer;
    }
    // Check if the current superframe is complete.
    // TODO(bugs.webrtc.org/10064): consider returning all available to
    // decode frames even if the superframe is not complete yet.
    // 对于h264 last_layer_completed = true  
    if (!last_layer_completed) {
      continue;
    }
    //通过std::move将current_superframe迭代器容器移动到frames_to_decode_
    frames_to_decode_ = std::move(current_superframe);
    //如果未设置渲染时间,则这里设置渲染时间,默认h264数据frame->RenderTime() == -1
    if (frame->RenderTime() == -1) {
      frame->SetRenderTime(timing_->RenderTimeMs(frame->Timestamp(), now_ms));
    }
    //重新获取等待时间,是什么原理?很重要后续会进行深入分析
    wait_ms = timing_->MaxWaitingTime(frame->RenderTime(), now_ms);
    // This will cause the frame buffer to prefer high framerate rather
    // than high resolution in the case of the decoder not decoding fast
    // enough and the stream has multiple spatial and temporal layers.
    // For multiple temporal layers it may cause non-base layer frames to be
    // skipped if they are late.
    // 如果wait_ms小于-5 (kMaxAllowedFrameDelayMs的值为5),
    // 根据上面的英文注释是表示在高帧率的情况下解码器性能有限,该帧已经来不及渲染了,需要忽略该帧。  
    if (wait_ms < -kMaxAllowedFrameDelayMs)
      continue;
    //到此已经完美的找到了一个待解码帧对应在frames_容器中的迭代器位置了。
    break;
  }
  //更新剩余等待时间,先取最小值,后面和0取最大值,这里返回的是一个时间间隔,任务调度可能最大超时为3秒,经过上述的处理和评估,这里进行重新估计。
  //这个值会作用到哪里?  
  wait_ms = std::min<int64_t>(wait_ms, latest_return_time_ms_ - now_ms);
  wait_ms = std::max<int64_t>(wait_ms, 0);
  return wait_ms;
}

6)RepeatingTaskHandle::DelayedStart延迟重复任务工作流程

void FrameBuffer::StartWaitForNextFrameOnQueue() {
  RTC_DCHECK(callback_queue_);
  RTC_DCHECK(!callback_task_.Running());
  int64_t wait_ms = FindNextFrame(clock_->TimeInMilliseconds());
  callback_task_ = RepeatingTaskHandle::DelayedStart(
      callback_queue_->Get(), TimeDelta::Millis(wait_ms), [this] {
        RTC_DCHECK_RUN_ON(&callback_checker_);
        // If this task has not been cancelled, we did not get any new frames
        // while waiting. Continue with frame delivery.
        rtc::CritScope lock(&crit_);
        if (!frames_to_decode_.empty()) {//已经有待解码的帧
          // We have frames, deliver!
          frame_handler_(absl::WrapUnique(GetNextFrame()), kFrameFound);
          CancelCallback();
          return TimeDelta::Zero();  // Ignored.
        } else if (clock_->TimeInMilliseconds() >= latest_return_time_ms_) {//已经超时
          // We have timed out, signal this and stop repeating.
          frame_handler_(nullptr, kTimeout);
          CancelCallback();
          return TimeDelta::Zero();  // Ignored.
        } else {//没找到帧也没有超时
          // If there's no frames to decode and there is still time left, it
          // means that the frame buffer was cleared between creation and
          // execution of this task. Continue waiting for the remaining time.
          int64_t wait_ms = FindNextFrame(clock_->TimeInMilliseconds());
          return TimeDelta::Millis(wait_ms);
        }
      });
}

6.1 )GetNextFrame()原理

EncodedFrame* FrameBuffer::GetNextFrame() {
  RTC_DCHECK_RUN_ON(&callback_checker_);
  int64_t now_ms = clock_->TimeInMilliseconds();
  // TODO(ilnik): remove |frames_out| use frames_to_decode_ directly.
  std::vector<EncodedFrame*> frames_out;
  RTC_DCHECK(!frames_to_decode_.empty());
  //定义超级帧是否由重传帧
  bool superframe_delayed_by_retransmission = false;
  //定义超级帧的大小
  size_t superframe_size = 0;
  //从头部获取,上面是尾部插入,这里刚好满足先入先出的原则  
  EncodedFrame* first_frame = frames_to_decode_[0]->second.frame.get();
  //得到预期渲染时间,在FindNextFrame函数中设置  
  int64_t render_time_ms = first_frame->RenderTime();
  /* 当前帧数据最后一个包的接收时间。接收时间和渲染时间一相减是不是就得出了当前帧数据
    * 从组帧到解码到渲染之间的延迟了?经过调试发现
    * 延迟在从组帧到解码再到渲染之间的时间确实是比较大的
    *华为mate30 1920*1080@30fps差不多平均有130ms,需要优化
    * 从接收到该帧的最后一个包到当前处理的延迟5~30ms,也就是从解码到渲染起码占100ms
    * 以上为在局域网测试
    * 将这段时间如果能降低到50ms以内,那整个延迟就真的很优秀了。
  */
  int64_t receive_time_ms = first_frame->ReceivedTime();
  // Gracefully handle bad RTP timestamps and render time issues.
  // 检查帧的渲染时间戳或者当前的目标延迟是否有异常,如果是则重置时间处理器,重新获取帧的渲染时间,规则在下面进行分析。
  if (HasBadRenderTiming(*first_frame, now_ms)) {
    jitter_estimator_.Reset();
    timing_->Reset();
    render_time_ms = timing_->RenderTimeMs(first_frame->Timestamp(), now_ms);
  }
  // 遍历所有待解码帧(他们应该有同样的时间戳),如果由多帧数据最后会封装成一个超级帧
  // 根据实验结果基本上都是一帧  
  for (FrameMap::iterator& frame_it : frames_to_decode_) {
    RTC_DCHECK(frame_it != frames_.end());
    //释放frame_容器中的FrameInfo结构中的frame内存,这里用frame来接收  
    EncodedFrame* frame = frame_it->second.frame.release();
    //每一次调度要送到解码队列中的待解码帧都由相同的渲染时间。
    //为每帧设置渲染时间,最后该集合中的帧会被打包成一个大的frame,送到解码队列  
    frame->SetRenderTime(render_time_ms);
    //每次遍历取或,如果里面有帧数据是属于重传过来的这里将被设置成true
    superframe_delayed_by_retransmission |= frame->delayed_by_retransmission();
    //计算最大接收时间,取最大的假设这个frames_to_decode_有5帧数据那么取时间戳最大的  
    receive_time_ms = std::max(receive_time_ms, frame->ReceivedTime());
    //累加所有帧的大小, 
    superframe_size += frame->size();
    //传播能否解码的连续性。这里要用来干嘛?
    PropagateDecodability(frame_it->second);
    //将即将要发送到解码队列的数据信息插入到历史记录,对已发送到解码队列中的帧进行统计。  
    decoded_frames_history_.InsertDecoded(frame_it->first, frame->Timestamp());
    // Remove decoded frame and all undecoded frames before it.
    // 状态回调,通过std::count_if统计在frame_it之前多少帧数据要被drop掉  
    if (stats_callback_) {
      unsigned int dropped_frames = std::count_if(
          frames_.begin(), frame_it,
          [](const std::pair<const VideoLayerFrameId, FrameInfo>& frame) {
            return frame.second.frame != nullptr;
          });
      if (dropped_frames > 0) {
        stats_callback_->OnDroppedFrames(dropped_frames);
      }
    }
    //将要发送的帧从缓存记录中清除。
    frames_.erase(frames_.begin(), ++frame_it);
    //清除的这一帧数据先存入到frames_out容器,最后会将该集合中的所有帧打包成一个超级帧
    frames_out.push_back(frame);
  }
  //如果上面得出要发送到解码队列的帧集合中有
  if (!superframe_delayed_by_retransmission) {
    int64_t frame_delay;
    //计算延迟
    if (inter_frame_delay_.CalculateDelay(first_frame->Timestamp(),
                                          &frame_delay, receive_time_ms)) {
      //frame_delay的值可能为负值  
      jitter_estimator_.UpdateEstimate(frame_delay, superframe_size);
    }
    //protection_mode_默认为kProtectionNack
    float rtt_mult = protection_mode_ == kProtectionNackFEC ? 0.0 : 1.0;
    absl::optional<float> rtt_mult_add_cap_ms = absl::nullopt;
    //若rtt_mult_settings_有值则获取该值,用于下面作用到JitterDelay
    if (rtt_mult_settings_.has_value()) {
      //可通过类似"WebRTC-RttMult/Enable-0.60,100.0/"来启用或者设置值,默认是没有值的  
      rtt_mult = rtt_mult_settings_->rtt_mult_setting;
      rtt_mult_add_cap_ms = rtt_mult_settings_->rtt_mult_add_cap_ms;
    }
    //设置JitterDelay
    timing_->SetJitterDelay(
        jitter_estimator_.GetJitterEstimate(rtt_mult, rtt_mult_add_cap_ms));
    //更新当前延迟
    timing_->UpdateCurrentDelay(render_time_ms, now_ms);
  } else {
    //如果有重传帧,那么延迟估计根据FrameNacked来更新。
    if (RttMultExperiment::RttMultEnabled() || add_rtt_to_playout_delay_)
      jitter_estimator_.FrameNacked();
  }
  //更新JitterDelay
  UpdateJitterDelay();
  //更新帧率时序信息
  UpdateTimingFrameInfo();
  //如果只有一帧的话则直接返回frames_out[0]
  if (frames_out.size() == 1) {
    return frames_out[0];
  } else {
    //打包超级帧
    return CombineAndDeleteFrames(frames_out);
  }
}

6.2 )HasBadRenderTiming()原理

bool FrameBuffer::HasBadRenderTiming(const EncodedFrame& frame,
                                      int64_t now_ms) {
  // Assume that render timing errors are due to changes in the video stream.
  int64_t render_time_ms = frame.RenderTimeMs();
  // Zero render time means render immediately.
  if (render_time_ms == 0) {
    return false;
  }
  if (render_time_ms < 0) {
    return true;
  }
  const int64_t kMaxVideoDelayMs = 10000;
  if (std::abs(render_time_ms - now_ms) > kMaxVideoDelayMs) {
    int frame_delay = static_cast<int>(std::abs(render_time_ms - now_ms));
    RTC_LOG(LS_WARNING)
        << "A frame about to be decoded is out of the configured "
            "delay bounds ("
        << frame_delay << " > " << kMaxVideoDelayMs
        << "). Resetting the video jitter buffer.";
    return true;
  }
  if (static_cast<int>(timing_->TargetVideoDelay()) > kMaxVideoDelayMs) {
    RTC_LOG(LS_WARNING) << "The video target delay has grown larger than "
                        << kMaxVideoDelayMs << " ms.";
    return true;
  }
  return false;
}

7)总结


原文出处:WebRTC Video Receiver(七)-基于Kalman filter模型的平滑渲染时间估计

1)前言

2)PlayoutDelay更新

#common_types.h
struct PlayoutDelay {
  PlayoutDelay(int min_ms, int max_ms) : min_ms(min_ms), max_ms(max_ms) {}
  int min_ms;//最小播放延迟
  int max_ms;//做到播放延迟
  ....  
}
#rtp_video_header.h
struct RTPVideoHeader {
  ....  
  PlayoutDelay playout_delay = {-1, -1};
  ....
}
## encoded_image.h
class RTC_EXPORT EncodedImage {
  public:
  ...  
  // When an application indicates non-zero values here, it is taken as an
  // indication that all future frames will be constrained with those limits
  // until the application indicates a change again.
  PlayoutDelay playout_delay_ = {-1, -1};
  ...
}
RtpFrameObject::RtpFrameObject(
    ......
    : first_seq_num_(first_seq_num),
      last_seq_num_(last_seq_num),
      last_packet_received_time_(last_packet_received_time),
      times_nacked_(times_nacked) {
  // Setting frame's playout delays to the same values
  // as of the first packet's.
  SetPlayoutDelay(rtp_video_header_.playout_delay);
  ...
}
"http://www.webrtc.org/experiments/rtp-hdrext/playout-delay"

void VideoReceiveStream2::OnCompleteFrame(
    std::unique_ptr<video_coding::EncodedFrame> frame) {
  ....
  //拿到PlayoutDelay引用    
  const PlayoutDelay& playout_delay = frame->EncodedImage().playout_delay_;
  if (playout_delay.min_ms >= 0) {
    frame_minimum_playout_delay_ms_ = playout_delay.min_ms;
    UpdatePlayoutDelays();
  }
  if (playout_delay.max_ms >= 0) {
    frame_maximum_playout_delay_ms_ = playout_delay.max_ms;
    UpdatePlayoutDelays();
  }
  ....
}
void VCMTiming::set_min_playout_delay(int min_playout_delay_ms) {
  rtc::CritScope cs(&crit_sect_);
  min_playout_delay_ms_ = min_playout_delay_ms;
}

void VCMTiming::set_max_playout_delay(int max_playout_delay_ms) {
  rtc::CritScope cs(&crit_sect_);
  max_playout_delay_ms_ = max_playout_delay_ms;
}

3)RenderTimeMs设置流程

int64_t FrameBuffer::FindNextFrame(int64_t now_ms) {
  ....
  //默认该函数调用到这里的时候期望渲染时间都还未赋值的    
  if (frame->RenderTime() == -1) {
      //首先调用VCMTiming获取期望渲染时间,然后将其设置到Frame中,供后续使用
      frame->SetRenderTime(timing_->RenderTimeMs(frame->Timestamp(), now_ms));
  }
  ...  
  //得出最大等待时间    
  wait_ms = timing_->MaxWaitingTime(frame->RenderTime(), now_ms);
  ....
  //取最小时间,如果在一次调度时间(未超时范围内)的话,返回wait_ms    
  wait_ms = std::min<int64_t>(wait_ms, latest_return_time_ms_ - now_ms);
  wait_ms = std::max<int64_t>(wait_ms, 0);  
  return wait_ms;
}

3.1)VCMTiming模块获取期望渲染时间

int64_t VCMTiming::RenderTimeMs(uint32_t frame_timestamp,
                                int64_t now_ms) const {
  rtc::CritScope cs(&crit_sect_);
  return RenderTimeMsInternal(frame_timestamp, now_ms);
}
//frame_timestamp为当前帧的时间戳以1/90k为单位,now_ms为当前Clock时间
int64_t VCMTiming::RenderTimeMsInternal(uint32_t frame_timestamp,
                                        int64_t now_ms) const {
  //如果min_playout_delay_ms_=0并且max_playout_delay_ms_=0则表示立即渲染
  // 不建议赋值0,若赋值0的话jitterDelay就失效了
  if (min_playout_delay_ms_ == 0 && max_playout_delay_ms_ == 0) {
    // Render as soon as possible.
    return 0;
  }
  //传入当前帧的时间戳,来得到一个平滑渲染时间,TimestampExtrapolator通过卡尔曼滤波负责期望接收时间的产生
  int64_t estimated_complete_time_ms =
      ts_extrapolator_->ExtrapolateLocalTime(frame_timestamp);
  if (estimated_complete_time_ms == -1) {
    estimated_complete_time_ms = now_ms;
  }
  // Make sure the actual delay stays in the range of |min_playout_delay_ms_|
  // and |max_playout_delay_ms_|.
  // 和min_playout_delay_ms_取最大值,min_playout_delay_ms_默认值-1,  
  int actual_delay = std::max(current_delay_ms_, min_playout_delay_ms_);
  //和max_playout_delay_ms_求最小值,max_playout_delay_ms_默认值-1  
  actual_delay = std::min(actual_delay, max_playout_delay_ms_);
  return estimated_complete_time_ms + actual_delay;
}

3.2)VCMTiming模块获取调度等待时间

int64_t VCMTiming::MaxWaitingTime(int64_t render_time_ms,
                                  int64_t now_ms) const {
  rtc::CritScope cs(&crit_sect_);
  const int64_t max_wait_time_ms =
      render_time_ms - now_ms - RequiredDecodeTimeMs() - render_delay_ms_;
  return max_wait_time_ms;
}

4)TimestampExtrapolator Kalman filter期望渲染时间估计

4.1)TimestampExtrapolator模块Kalman模型

// Local time in webrtc time base.
int64_t current_time_us = clock_->TimeInMicroseconds();
int64_t current_time_ms = current_time_us / rtc::kNumMicrosecsPerMillisec;
// Capture time may come from clock with an offset and drift from clock_.
int64_t capture_ntp_time_ms = current_time_ms + delta_ntp_internal_ms_;
// Convert NTP time, in ms, to RTP timestamp.
const int kMsToRtpTimestamp = 90;
uint32_t timestamp_rtp =
    kMsToRtpTimestamp * static_cast<uint32_t>(capture_ntp_time_ms);
fps = 60fps
samplerate = 90000    
timestampDiff(k) = rtpTimeStamp(k) - rtpTimeStamp(0)          (4.1.1)
timestampDiffToMs(k) = timestampDiff(k) * 1000 / samplerate   (4.1.2)

t(0) = _startMs
t(k) = timestampDiffToMs(k) + t(0)    (4.1.3)
t(k) = timestampDiffToMs(k) + t(0) + error(k)   (4.1.4)
t(k) = (timestampDiff(k) - jitterTimestamp(k))  / sampleratePermillage(k) + t(0)   (4.1.5)
w(k) = w(k-1) + u(k-1)          P(u) ~ (0,Q)    (4.1.6)      
w_bar(k) = [sampleratePermillage(k) jitterTimestamp(k)]^    
```cpp   

  * 定义目标二维向量`w_bar(k)`

  * `u(k-1)`为过程噪声服从正太分布由于样本`samplerate_permillage(k)``jitterTimestamp(k)`完全独立所以其协方差矩阵Q似乎可以取0

  * 状态转移方程如果用矩阵的表示方式如下

![](./image/20210716-180000-5.png)

  * 同时建立如下观测方程

```cpp
timestampDiff(k) = t_bar(k)^ * w_bar(k) + v(k)          P(v) ~ (0,R)    (4.1.7)
t_bar(k) = [recvTimeMsDiff(k) 1]^    
```cpp   

  * `v(k)`为测量噪声服从正太分布,其协方差矩阵为R,取值为1

  * `t_bar(i)`为第(k)帧观测方程系数矩阵

  * `recvTimeMsDiff(k)`表示第(k)帧和第一帧的本地接收时间之差

  * 观测方程如果用矩阵的表示方式如下

![](./image/20210716-180000-6.png)

  * 网络残差公式

```cpp
residual(k) = timestampDiff(k) - t_bar(k)^ * w_hat(k-1)       (4.1.8)

4.2)TimestampExtrapolator模块计算期望接收时间

int64_t TimestampExtrapolator::ExtrapolateLocalTime(uint32_t timestamp90khz) {
  ReadLockScoped rl(*_rwLock);
  int64_t localTimeMs = 0;
  CheckForWrapArounds(timestamp90khz);
  double unwrapped_ts90khz =
      static_cast<double>(timestamp90khz) +
      _wrapArounds * ((static_cast<int64_t>(1) << 32) - 1);
  if (_packetCount == 0) {
    localTimeMs = -1;
  } else if (_packetCount < _startUpFilterDelayInPackets) {
    localTimeMs =
        _prevMs +
        static_cast<int64_t>(
            static_cast<double>(unwrapped_ts90khz - _prevUnwrappedTimestamp) /
                90.0 +
            0.5);
  } else {
    if (_w[0] < 1e-3) {
      localTimeMs = _startMs;
    } else {
      double timestampDiff =
          unwrapped_ts90khz - static_cast<double>(_firstTimestamp);
      localTimeMs = static_cast<int64_t>(static_cast<double>(_startMs) +
                                          (timestampDiff - _w[1]) / _w[0] + 0.5);
    }
  }
  return localTimeMs;
}

4.3)TimestampExtrapolator模块Kalman预测及校正

//参数tMs为当前帧实际接收时间
//参数ts90khz为当前帧的rtp时间戳
void TimestampExtrapolator::Update(int64_t tMs, uint32_t ts90khz) {
    _rwLock->AcquireLockExclusive();
  //1)第一帧初始赋值  
  if (tMs - _prevMs > 10e3) {//第一帧或者10秒钟内未收到任何完整的帧则重置
    // Ten seconds without a complete frame.
    // Reset the extrapolator
    _rwLock->ReleaseLockExclusive();
    Reset(tMs);
    _rwLock->AcquireLockExclusive();
  } else {
    _prevMs = tMs;
  } 
  //2)根据当前帧的本地接收时间计算detalRecvTimeMs(k)  
  // Remove offset to prevent badly scaled matrices
  // 将当前帧接收时间 - 第一帧的接收时间得当前帧和第一帧的本地接收时间差
  // 此处记为detalRecvTimeMs =  tMs - _startMs  
  int64_t recvTimeMsDiff = tMs - _startMs;
  CheckForWrapArounds(ts90khz);
  int64_t unwrapped_ts90khz =
      static_cast<int64_t>(ts90khz) +
      _wrapArounds * ((static_cast<int64_t>(1) << 32) - 1);
  if (_firstAfterReset) {//重置后赋值初值
    // Make an initial guess of the offset,
    // should be almost correct since tMs - _startMs
    // should about zero at this time.
    _w[1] = -_w[0] * tMs;
    _firstTimestamp = unwrapped_ts90khz;
    _firstAfterReset = false;
  }
  /*3)使用上一次最优估计计算网络残差为计算验估计做准备,对应5大核心公式的公式(4) 以及4.1.8
      用当前帧真实的rtp时间戳 - 第一帧的时间戳 - detalRecvTimeMs * _w[0] - _w[1]
      detalRecvTimeMs * _w[0](上一次的最优采样率) 得出detalRtpTimeStamp 
  */  
  double residual = (static_cast<double>(unwrapped_ts90khz) - _firstTimestamp) -
                    static_cast<double>(recvTimeMsDiff) * _w[0] - _w[1];
  if (DelayChangeDetection(residual) &&
      _packetCount >= _startUpFilterDelayInPackets) {
    // A sudden change of average network delay has been detected.
    // Force the filter to adjust its offset parameter by changing
    // the offset uncertainty. Don't do this during startup.
    _pP[1][1] = _pP11;
  }
  if (_prevUnwrappedTimestamp >= 0 &&
      unwrapped_ts90khz < _prevUnwrappedTimestamp) {
    // Drop reordered frames.
    _rwLock->ReleaseLockExclusive();
    return;
  }
  // T = [t(k) 1]';
  // that = T'*w;
  // K = P*T/(lambda + T'*P*T);
  // 4)计算卡尔曼增益  
  double K[2];
  // 对应5大公式,公式3中的分子部分  
  K[0] = _pP[0][0] * recvTimeMsDiff + _pP[0][1];
  K[1] = _pP[1][0] * recvTimeMsDiff + _pP[1][1];
  // 对应5大公式,公式3中的分母部分   
  double TPT = _lambda + recvTimeMsDiff * K[0] + K[1];
  K[0] /= TPT;
  K[1] /= TPT;
  //5) 根据最优卡尔曼因子进行校正,计算后验估计值  
  // w = w + K*(ts(k) - that);
  _w[0] = _w[0] + K[0] * residual;
  _w[1] = _w[1] + K[1] * residual;
  //6)更新误差协方差  
  // P = 1/lambda*(P - K*T'*P);
  double p00 =
      1 / _ * (_pP[0][0] - (K[0] * recvTimeMsDiff * _pP[0][0] + K[0] * _pP[1][0]));
  double p01 =
      1 / _lambda * (_pP[0][1] - (K[0] * recvTimeMsDiff * _pP[0][1] + K[0] * _pP[1][1]));
  _pP[1][0] =
      1 / _lambda * (_pP[1][0] - (K[1] * recvTimeMsDiff * _pP[0][0] + K[1] * _pP[1][0]));
  _pP[1][1] =
      1 / _lambda * (_pP[1][1] - (K[1] * recvTimeMsDiff * _pP[0][1] + K[1] * _pP[1][1]));
  _pP[0][0] = p00;
  _pP[0][1] = p01;
  _prevUnwrappedTimestamp = unwrapped_ts90khz;
  if (_packetCount < _startUpFilterDelayInPackets) {
    _packetCount++;
  }
  _rwLock->ReleaseLockExclusive();
}
expectRenderTime = expectRecvTime + actual_delay

5)计算期望渲染时间

void VCMTiming::SetJitterDelay(int jitter_delay_ms) {
  rtc::CritScope cs(&crit_sect_);
  if (jitter_delay_ms != jitter_delay_ms_) {
    jitter_delay_ms_ = jitter_delay_ms;
    // When in initial state, set current delay to minimum delay.
    if (current_delay_ms_ == 0) {
      current_delay_ms_ = jitter_delay_ms_;
    }
  }
}
void VCMTiming::UpdateCurrentDelay(int64_t render_time_ms,
                                    int64_t actual_decode_time_ms) {
  rtc::CritScope cs(&crit_sect_);
  uint32_t target_delay_ms = TargetDelayInternal();//目标延迟
  //计算实际延迟  
  int64_t delayed_ms =
      actual_decode_time_ms -
      (render_time_ms - RequiredDecodeTimeMs() - render_delay_ms_);
  if (delayed_ms < 0) {
    return;
  }
  if (current_delay_ms_ + delayed_ms <= target_delay_ms) {
    current_delay_ms_ += delayed_ms;
  } else {
    current_delay_ms_ = target_delay_ms;
  }
}
int VCMTiming::TargetDelayInternal() const {
  //计算出目标延迟 =  jitter_delay_ms_ + 解码耗时 +  渲染延迟
  return std::max(min_playout_delay_ms_,
                  jitter_delay_ms_ + RequiredDecodeTimeMs() + render_delay_ms_);
}
current_delay_ms_ = 帧间抖动延时 + 解码耗时 + 渲染延迟
expectRenderTime = expectRecvTime + actual_delay
                 = 期望接收时间 + 帧间抖动延时 + 解码耗时 + 渲染延迟

6)总结


原文出处:WebRTC Video Receiver(八)-基于Kalman filter模型的JitterDelay原理分析

1)前言

2)Arrival-time model模型介绍

d(i) = t(i) - t(i-1) - (T(i) - T(i-1))        (2.1.1)
     = L(i)/C(i) - L(i-1)/C(i-1) + w(i)
       L(i)-L(i-1)
     = -------------- + w(i)
           C(i)
     = dL(i)/C(i) + w(i)
w(i) = m(i) + v(i)
d(i) = dL(i)/C(i) + m(i) + v(i)

3)Kalman filter 建模及理论推导

3.1 卡尔曼滤波-建立状态空间方程

theta(i) = A * theta(i-1) + u(i-1)        P(u) ~ (0,Q)         
         = theta(i-1) + u(i-1)            (3.1.1)
Q(i) = E{u_bar(i) * u_bar(i)^T}
diag(Q(i)) = [10^-13 10^-3]^T
theta_bar(i) = [1/C(i)  m(i)]^T

3.2 卡尔曼滤波-建立观测方程

d(i) = H * theta(i) + v(i)          P(V) ~ (0,R)      (3.2.2)
h_bar(i) = [dL(i)  1]^T
H = h_bar(i)^T = [dL(i)  1]
d(i) = h_bar(i)^T * theta_bar(i) + v(i)          (3.2.3)
variance var_v = sigma(v,i)^2
R(i) = E{v_bar(i) * v_bar(i)^T}  
     = var_v
d(i) = h_bar(i)^T * theta_bar(i) + v(i)
    = [dL(i)  1] * [1/C(i)  m(i)]^T + v(i)       (3.2.4)

3.3 卡尔曼滤波-预测计算先验估计

theta_hat^-(i) = theta_hat(i-1) + u(i-1)             (3.3.1)

3.4 卡尔曼滤波-预测计算先验估计误差协方差

e^-(i) = theta(i) - theta_hat^-(i)                      P(E(i)) ~ (0 , P)   (3.4.1)
P^-(i) = {e^-(i) * e^-(i)^T}
       = E{(theta(i) - theta_hat^-(i)) * (theta(i) - theta_hat^-(i))^T}     (3.4.2)
       = A * P(i-1) * A^T + Q                                               (3.4.3)
       = P(i-1) + Q 
       = E(i-1) + Q                                                         (3.4.4)

3.5 卡尔曼滤波-校正计算卡尔曼增益

P^-(i) * H^T
k_bar(i) = ------------------------------------------------------
              H * P^-(i) * H^T + R                   
                  P^-(i) * h_bar(i)
        = ------------------------------------------------------            (3.5.1)
              h_bar(i)^T * P^-(i) * h_bar(i) + R                   
                ( E(i-1) + Q(i) ) * h_bar(i)
        = ------------------------------------------------------            (3.5.2)
          var_v_hat(i) + h_bar(i)^T * (E(i-1) + Q(i)) * h_bar(i)
The variance var_v(i) = sigma_v(i)^2 is estimated using an exponential averaging filter, modified for variable sampling rate
var_v_hat(i) = max(beta * var_v_hat(i-1) + (1-beta) * z(i)^2, 1)                    (3.5.3)
beta = (1-chi)^(30/(1000 * f_max))                                                  (3.5.4)

3.6 卡尔曼滤波-校正计算后验估计值

theta_hat(i) = theta_hat^-(i) + k_bar(i) * (d(i) - H * theta_hat^-(i))
             = theta_hat(i-1) + k_bar(i) * (d(i) - H * theta_hat(i-1))              (3.6.1)
             = theta_hat(i-1) + k_bar(i) * d(i) - k_bar(i) * H * theta_hat(i-1)     
             = (1 - k_bar(i) * H) * theta_hat(i-1)  + k_bar(i) * d(i)               (3.6.2)
k_bar(i) ~ [0 ~ 1/H]
z(i) = d(i) - h_bar(i)^T * theta_hat(i-1)                                           (3.6.3)
theta_hat(i) = theta_hat(i-1) + z(i) * k_bar(i)                                     (3.6.4)

3.7 卡尔曼滤波-更新误差协方差

e(i) = theta(i) - theta_hat(i)                      P(E(i)) ~ (0 , P)               (3.7.1)
P(i) = E{e(i) * e(i)^T}                                                             (3.7.2)
     = E{(theta(i) - theta_hat(i)) * (theta(i) - theta_hat(i))^T}                   (3.7.3)
     = E(i)
P(i) = (I - k_bar(i) * H) * P^-(i)                                                 (3.7.4)
     = (I - k_bar(i) * h_bar(i)^T) * (E(i-1) + Q(i))                               (3.7.5)
     = E(i)
3.8 卡尔曼滤波-系统模型图

m(i) = (1  K(i)) * m(i1) + K(i) * (dm(i))

4)WebRTC中JitterDelay的计算和迭代过程

bool VCMInterFrameDelay::CalculateDelay(uint32_t timestamp,
                                        int64_t* delay,
                                        int64_t currentWallClock) {
  .....
  // Compute the compensated timestamp difference and convert it to ms and round
  // it to closest integer.
  _dTS = static_cast<int64_t>(
      (timestamp + wrapAroundsSincePrev * (static_cast<int64_t>(1) << 32) -
        _prevTimestamp) /
          90.0 +
      0.5);\
  // frameDelay is the difference of dT and dTS -- i.e. the difference of the
  // wall clock time difference and the timestamp difference between two
  // following frames.
  *delay = static_cast<int64_t>(currentWallClock - _prevWallClock - _dTS);
  _prevTimestamp = timestamp;
  _prevWallClock = currentWallClock;
  return true;
}

4.1)计算JitterDelay

int VCMJitterEstimator::GetJitterEstimate(
    double rttMultiplier,
    absl::optional<double> rttMultAddCapMs) {
  //调用CalculateEstimate()计算当前的jitterDelay,OPERATING_SYSTEM_JITTER默认为10ms  
  //这就意味着默认最小的jittterDelay至少是10ms?     
  double jitterMS = CalculateEstimate() + OPERATING_SYSTEM_JITTER;
  uint64_t now = clock_->TimeInMicroseconds();
  //kNackCountTimeoutMs = 60000
  // FrameNacked会更新_latestNackTimestamp单位为微秒  
  //1分钟内若所有帧都未丢包则清除  
  if (now - _latestNackTimestamp > kNackCountTimeoutMs * 1000)
    _nackCount = 0;
  if (_filterJitterEstimate > jitterMS)
    jitterMS = _filterJitterEstimate;
  if (_nackCount >= _nackLimit) {//_nackLimit
    if (rttMultAddCapMs.has_value()) {
      jitterMS +=
          std::min(_rttFilter.RttMs() * rttMultiplier, rttMultAddCapMs.value());
    } else {
      jitterMS += _rttFilter.RttMs() * rttMultiplier;
    }
  }
  ....
  return rtc::checked_cast<int>(std::max(0.0, jitterMS) + 0.5);
}
// Calculates the current jitter estimate from the filtered estimates.
double VCMJitterEstimator::CalculateEstimate() {
  double ret = _theta[0] * (_maxFrameSize - _avgFrameSize) + NoiseThreshold();
  .......
  _prevEstimate = ret;
  return ret;
}
double VCMJitterEstimator::NoiseThreshold() const {
  double noiseThreshold = _noiseStdDevs * sqrt(_varNoise) - _noiseStdDevOffset;
  if (noiseThreshold < 1.0) {
    noiseThreshold = 1.0;
  }
  return noiseThreshold;
}

4.2)JitterDelay迭代更新机制

// Updates the estimates with the new measurements.
void VCMJitterEstimator::UpdateEstimate(int64_t frameDelayMS,
                                        uint32_t frameSizeBytes,
                                        bool incompleteFrame /* = false */) {
  //1)计算当前帧和上一帧的数据量之差
  int deltaFS = frameSizeBytes - _prevFrameSize;
  //2)计算_avgFrameSize平均每帧数据的大小 
  if (_fsCount < kFsAccuStartupSamples) {
    _fsSum += frameSizeBytes;
    _fsCount++;
  } else if (_fsCount == kFsAccuStartupSamples) {//kFsAccuStartupSamples取值为5超过5帧开始计算平均帧大小
    // Give the frame size filter.
    _avgFrameSize = static_cast<double>(_fsSum) / static_cast<double>(_fsCount);
    _fsCount++;
  }
  /*3)若当前输入帧的大小比平均每帧数据的数据量要大,则对其进行滑动平均处理,比如说如果当前是一个I帧,数据量显然会比较大,
    默认incompleteFrame为false,所以每帧都会计算平均值*/
  if (!incompleteFrame || frameSizeBytes > _avgFrameSize) {
    //滑动平均算法,_phi的取值为0.97,取接近前30帧数据大小的平均值,求得的avgFrameSize值为接近近30帧数据的平均大小  
    double avgFrameSize = _phi * _avgFrameSize + (1 - _phi) * frameSizeBytes;
    //如果I帧数据量会比较大,如下的判断会不成立,偏移太大不计赋值_avgFrameSize   
    if (frameSizeBytes < _avgFrameSize + 2 * sqrt(_varFrameSize)) {
      // Only update the average frame size if this sample wasn't a key frame.
      _avgFrameSize = avgFrameSize;
    }
    // Update the variance anyway since we want to capture cases where we only
    // get key frames.
    //3.1)此处更新平均帧大下的方差,默认方差为100,取其最大值,根据_varFrameSize可以得出在传输过程中每帧数据大小的均匀性
    //    若方差较大则说明帧的大小偏离平均帧大小的程度越大,则均匀性也越差
    _varFrameSize = VCM_MAX(
        _phi * _varFrameSize + (1 - _phi) * (frameSizeBytes - avgFrameSize) *
                                    (frameSizeBytes - avgFrameSize),
        1.0);
  }
  // Update max frameSize estimate.
  //4)计算最大帧数据量  
  _maxFrameSize =
      VCM_MAX(_psi * _maxFrameSize, static_cast<double>(frameSizeBytes));
  if (_prevFrameSize == 0) {
    _prevFrameSize = frameSizeBytes;
    return;
  }
  //赋值上一帧数据大小  
  _prevFrameSize = frameSizeBytes;
  // Cap frameDelayMS based on the current time deviation noise.
  /*5) 根据当前时间偏移噪声求frameDelayMS,_varNoise为噪声方差,默认4.0很显然该值在传输过程中会变化,
      time_deviation_upper_bound_为时间偏移上限值,默认为3.5,所以默认初始值计算出来max_time_deviation_ms
      为7,对于帧率越高,默认输入的frameDelayMS会越小,这里和max_time_deviation_ms去最小值,当噪声的方差越大
      max_time_deviation_ms的值月越大,其取值就会越接近取向传入的frameDelayMS*/ 
  int64_t max_time_deviation_ms =
      static_cast<int64_t>(time_deviation_upper_bound_ * sqrt(_varNoise) + 0.5);
  frameDelayMS = std::max(std::min(frameDelayMS, max_time_deviation_ms),
                          -max_time_deviation_ms);
  /*6)根据得出的延迟时间计算样本与卡尔曼滤波器估计的期望延迟之间的延迟差(反映网络噪声的大小),计算公式为
      frameDelayMS - (_theta[0] * deltaFSBytes + _theta[1])
      当前测量值 - 上一次卡尔曼滤波后的估计值 对应公式3.6.3和3.6.4*/ 
  double deviation = DeviationFromExpectedDelay(frameDelayMS, deltaFS);
  // Only update the Kalman filter if the sample is not considered an extreme
  // outlier. Even if it is an extreme outlier from a delay point of view, if
  // the frame size also is large the deviation is probably due to an incorrect
  // line slope.    
  //根据注释的意思是只有当样本不被认为是极端异常值时才更新卡尔曼滤波器,言外之意就是网络残差值不能超过
  //  _numStdDevDelayOutlier * sqrt(_varNoise) = 30ms 默认值,随_varNoise的大小变化而变化 
  if (fabs(deviation) < _numStdDevDelayOutlier * sqrt(_varNoise) ||
      frameSizeBytes >
          _avgFrameSize + _numStdDevFrameSizeOutlier * sqrt(_varFrameSize)) {
    // Update the variance of the deviation from the line given by the Kalman
    // filter.
    EstimateRandomJitter(deviation, incompleteFrame);
    // Prevent updating with frames which have been congested by a large frame,
    // and therefore arrives almost at the same time as that frame.
    // This can occur when we receive a large frame (key frame) which has been
    // delayed. The next frame is of normal size (delta frame), and thus deltaFS
    // will be << 0. This removes all frame samples which arrives after a key
    // frame.
    if ((!incompleteFrame || deviation >= 0.0) &&
        static_cast<double>(deltaFS) > -0.25 * _maxFrameSize) {
      // Update the Kalman filter with the new data
      KalmanEstimateChannel(frameDelayMS, deltaFS);
    }
  } else {  // 如果网络残差太大,说明噪声偏移太大,需要对测量噪声进行校正,本次不进行卡尔曼预测和校正  
    int nStdDev =
        (deviation >= 0) ? _numStdDevDelayOutlier : -_numStdDevDelayOutlier;
    EstimateRandomJitter(nStdDev * sqrt(_varNoise), incompleteFrame);
  }
  // Post process the total estimated jitter
  //6) 求得当前帧的jitterDelay最优估计值 
  if (_startupCount >= kStartupDelaySamples) {
    PostProcessEstimate();
  } else {
    _startupCount++;
  }
}
double VCMJitterEstimator::DeviationFromExpectedDelay(
    int64_t frameDelayMS,
    int32_t deltaFSBytes) const {
  return frameDelayMS - (_theta[0] * deltaFSBytes + _theta[1]); 
}
double avgFrameSize = _phi * _avgFrameSize + (1 - _phi) * frameSizeBytes;
_varFrameSize = VCM_MAX(
    _phi * _varFrameSize + (1 - _phi) * (frameSizeBytes - avgFrameSize) *
    (frameSizeBytes - avgFrameSize),1.0);

4.3)更新误差方差

void VCMJitterEstimator::EstimateRandomJitter(double d_dT,
                                              bool incompleteFrame) {
  uint64_t now = clock_->TimeInMicroseconds();
  //1)对帧率进行采样统计  
  if (_lastUpdateT != -1) {
    fps_counter_.AddSample(now - _lastUpdateT);
  }
  _lastUpdateT = now;
  if (_alphaCount == 0) {
    assert(false);
    return;
  }
  //2) alpha = 399  
  double alpha =
      static_cast<double>(_alphaCount - 1) / static_cast<double>(_alphaCount);
  _alphaCount++;
  if (_alphaCount > _alphaCountMax)
    _alphaCount = _alphaCountMax;//_alphaCountMax = 400
  // In order to avoid a low frame rate stream to react slower to changes,
  // scale the alpha weight relative a 30 fps stream.
  double fps = GetFrameRate();
  if (fps > 0.0) {
    double rate_scale = 30.0 / fps;
    // At startup, there can be a lot of noise in the fps estimate.
    // Interpolate rate_scale linearly, from 1.0 at sample #1, to 30.0 / fps
    // at sample #kStartupDelaySamples.
    if (_alphaCount < kStartupDelaySamples) {
      rate_scale =
          (_alphaCount * rate_scale + (kStartupDelaySamples - _alphaCount)) /
          kStartupDelaySamples;//kStartupDelaySamples = 30
    }
    //alpha = pow(399/400,30.0 / fps)  
    alpha = pow(alpha, rate_scale);
  }
  double avgNoise = alpha * _avgNoise + (1 - alpha) * d_dT;
  double varNoise =
      alpha * _varNoise + (1 - alpha) * (d_dT - _avgNoise) * (d_dT - _avgNoise);
  if (!incompleteFrame || varNoise > _varNoise) {
    _avgNoise = avgNoise;
    _varNoise = varNoise;
  }
  if (_varNoise < 1.0) {
    // The variance should never be zero, since we might get stuck and consider
    // all samples as outliers.
    _varNoise = 1.0;
  }
}
double avgNoise = alpha * _avgNoise + (1 - alpha) * d_dT;
double varNoise =
      alpha * _varNoise + (1 - alpha) * (d_dT - _avgNoise) * (d_dT - _avgNoise);

4.4)Kalman Filter 预测及校正

double _thetaCov[2][2];  // Estimate covariance 先验估计误差协方差
double _Qcov[2][2];      // Process noise covariance 过程噪声协方差对应Q向量
/**
@ frameDelayMS:为测量出来的当前帧和上一帧的帧间抖动延迟
@ deltaFSBytes:当前帧的数据量和上一帧的数据量之差
**/
void VCMJitterEstimator::KalmanEstimateChannel(int64_t frameDelayMS,
                                                int32_t deltaFSBytes) {
  double Mh[2];//P(i) = E[1/c(i) m(i)]
  double hMh_sigma;
  double kalmanGain[2];
  double measureRes;
  double t00, t01;
  // Kalman filtering
  /*1)计算先验估计误差协方差  
      结合公式3.4.1~3.4.4  
    e^-(i) = theta(i) - theta_hat^-(i)                          
    P^-(i) = {e^-(i) * e^-(i)^T}
            = E{(theta(i) - theta_hat^-(i)) * (theta(i) - theta_hat^-(i))^T}     
            = A * P(i-1) * A^T + Q                                               
            = P(i-1) + Q 
            = E(i-1) + Q   
    当前帧(i)的先验估计误差协防差 = 上一帧(i-1)的误差协方差 + 过程噪声协方差        
  */     
  //Prediction
  //M = M + Q = E(i-1) + Q     
  _thetaCov[0][0] += _Qcov[0][0];
  _thetaCov[0][1] += _Qcov[0][1];
  _thetaCov[1][0] += _Qcov[1][0];
  _thetaCov[1][1] += _Qcov[1][1];
  /*  
      2) 校正 根据公式3.5.1~3.5.2计算卡尔曼增益
                        P^-(i) * H^T
      k_bar(i) = ------------------------------------------------------
                    H * P^-(i) * H^T + R                   
                        P^-(i) * h_bar(i)
              = ------------------------------------------------------              (3.5.1)
                    h_bar(i)^T * P^-(i) * h_bar(i) + R                   
                      ( E(i-1) + Q(i) ) * h_bar(i)
              = ------------------------------------------------------              (3.5.2)
                var_v_hat(i) + h_bar(i)^T * (E(i-1) + Q(i)) * h_bar(i)    
  */
  // Kalman gain
  // K = M*h'/(sigma2n + h*M*h') = M*h'/(1 + h*M*h') = M*h'/(var_v_hat(i) + h*M*h')
  // h = [dFS 1] 其中dFS对应的入参deltaFSBytes
  // Mh = M*h' = _thetaCov[2][2] * [dFS 1]^
  // hMh_sigma = h*M*h' + R = h_bar(i)^T * (E(i-1) + Q(i)) * h_bar(i) + R   
  Mh[0] = _thetaCov[0][0] * deltaFSBytes + _thetaCov[0][1];// 对应1/C(i) 信道传输速率的误差协方差
  Mh[1] = _thetaCov[1][0] * deltaFSBytes + _thetaCov[1][1];// 对应网络排队延迟m(i)的误差协方差
  // sigma weights measurements with a small deltaFS as noisy and
  // measurements with large deltaFS as good
  if (_maxFrameSize < 1.0) {
    return;
  }
  //sigma为测量噪声标准差的指数平均滤波,对应的是测量噪声的协方差R
  double sigma = (300.0 * exp(-fabs(static_cast<double>(deltaFSBytes)) /
                              (1e0 * _maxFrameSize)) +
                  1) *
                  sqrt(_varNoise);
  if (sigma < 1.0) {
    sigma = 1.0;
  }
  // hMh_sigma 对应H * P^-(i) * H^T = h_bar(i)^T * (E(i-1) + Q(i)) * h_bar(i) + R 
  // 对应公式(3.5.1)  
  //[dFS 1]^ * Mh  =  dFS * Mh[0] + Mh[1]  
  hMh_sigma = deltaFSBytes * Mh[0] + Mh[1] + sigma;
  if ((hMh_sigma < 1e-9 && hMh_sigma >= 0) ||
      (hMh_sigma > -1e-9 && hMh_sigma <= 0)) {
    assert(false);
    return;
  }
  //计算卡尔曼增益Mh / hMh_sigma  
  kalmanGain[0] = Mh[0] / hMh_sigma;
  kalmanGain[1] = Mh[1] / hMh_sigma;
  /*
  3)根据公式3.6.1~3.6.4校正计算后验估计值
    theta_hat(i) = theta_hat^-(i) + k_bar(i) * (d(i) - H * theta_hat^-(i))
                  = theta_hat(i-1) + k_bar(i) * (d(i) - H * theta_hat(i-1))              (3.6.1)
                  = theta_hat(i-1) + k_bar(i) * d(i) - k_bar(i) * H * theta_hat(i-1)     
                  = (1 - k_bar(i) * H) * theta_hat(i-1)  + k_bar(i) * d(i)               (3.6.2)
    其中k_bar(i) ~ [0 ~ 1/H]  
    z(i) = d(i) - h_bar(i)^T * theta_hat(i-1)                                           (3.6.3)
    theta_hat(i) = theta_hat(i-1) + z(i) * k_bar(i)                                     (3.6.4)  
  */  
  // Correction
  // theta = theta + K*(dT - h*theta)
  // 计算网络残差,得到最优估计值  
  measureRes = frameDelayMS - (deltaFSBytes * _theta[0] + _theta[1]);
  _theta[0] += kalmanGain[0] * measureRes; //公式(3.6.4)  
  _theta[1] += kalmanGain[1] * measureRes; //公式(3.6.4)  
  if (_theta[0] < _thetaLow) {
    _theta[0] = _thetaLow;
  }
  /**
  4)根据公式3.7.1~3.7.4更新误差协方差,为下一次预测提供最优滤波器系数
    e(i) = theta(i) - theta_hat(i)                      P(E(i)) ~ (0 , P)               (3.7.1)
    P(i) = E{e(i) * e(i)^T}                                                             (3.7.2)
          = E{(theta(i) - theta_hat(i)) * (theta(i) - theta_hat(i))^T}                   (3.7.3)
          = (I - k_bar(i) * H) * P^-(i)                                                  (3.7.4)
          = (I - k_bar(i) * h_bar(i)^T) * (E(i-1) + Q(i))                                (3.7.5)
          = E(i)  
  */
  // M = (I - K*h)*M
  t00 = _thetaCov[0][0];
  t01 = _thetaCov[0][1];
  _thetaCov[0][0] = (1 - kalmanGain[0] * deltaFSBytes) * t00 -
                    kalmanGain[0] * _thetaCov[1][0];
  _thetaCov[0][1] = (1 - kalmanGain[0] * deltaFSBytes) * t01 -
                    kalmanGain[0] * _thetaCov[1][1];
  _thetaCov[1][0] = _thetaCov[1][0] * (1 - kalmanGain[1]) -
                    kalmanGain[1] * deltaFSBytes * t00;
  _thetaCov[1][1] = _thetaCov[1][1] * (1 - kalmanGain[1]) -
                    kalmanGain[1] * deltaFSBytes * t01;
  // Covariance matrix, must be positive semi-definite.
  assert(_thetaCov[0][0] + _thetaCov[1][1] >= 0 &&
          _thetaCov[0][0] * _thetaCov[1][1] -
                  _thetaCov[0][1] * _thetaCov[1][0] >=
              0 &&
          _thetaCov[0][0] >= 0);
}

double noiseThreshold = _noiseStdDevs * sqrt(_varNoise) - _noiseStdDevOffset;
jitterDelay = _theta[0] * (_maxFrameSize - _avgFrameSize) + NoiseThreshold();

4.5)webrtc jitterdelay相关数据测试

总结

参考文献