
'UTF-8 Encoding Algorithm' code example was used in W3C SPARQL JSON RegEx Parser code in 2011.

Subject: regexParser.cpp
Date: Jul, 2011
Author: ericP

A boost regex library approach to parsing JSON-format SPARQL results.

(Quick and dirtly syntax colorizing with emacs htmlize-buffer.)

/** JSONparser.cpp - boost::regex parser for the SPARQL JSON results
 * input: strings defined by
 * requires:
 *   boost::regex - regular expression library.
 *   boost::optional - optionally initialized container, like Haskel
 * author: ericP, Eric Prud'hommeaux .
 * license: Apache License, Version 2.0, free like speech.

/** ToUTF8 - convert ordinal references to UTF-8 sequences.
 * Out - an input_stream or something with a similar incremental
struct ToUTF8
 struct InvalidUCScode : public std::runtime_error {

 template<typename Out>
 Out operator() (const char* s, Out out) const {
     return operator()(strtol(s, (char**) NULL, 16), out);

 template<typename Out>
 Out operator() (unsigned int ord, Out out)
   const throw(InvalidUCScode) {
  if (ord < 0x80) {
      out = ord >> 0  & 0x7F | 0x00;
  } else if (ord < 0x0800) {
      out = ord >> 6  & 0x1F | 0xC0;
      out = ord >> 0  & 0x3F | 0x80;
  } else if (ord < 0x010000) {
      out = ord >> 12 & 0x0F | 0xE0;
      out = ord >> 6  & 0x3F | 0x80;
      out = ord >> 0  & 0x3F | 0x80;
  } else if (ord < 0x110000) {
      out = ord >> 18 & 0x07 | 0xF0;
      out = ord >> 12 & 0x3F | 0x80;
      out = ord >> 6  & 0x3F | 0x80;
      out = ord >> 0  & 0x3F | 0x80;
  } else {
      throw InvalidUCScode(ord);
  return out;

