
//: C04:HTMLStripper2.cpp
//{L} ../C03/ReplaceAll
// Filtr odrzucajcy znaczniki HTML
#include <cstddef>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include "../C03/ReplaceAll.h"
#include "../require.h"
using namespace std;

string& stripHTMLTags(string& s) throw(runtime_error) {
  size_t leftPos;
  while ((leftPos = s.find('<')) != string::npos) {
    size_t rightPos = s.find('>', leftPos+1);
    if (rightPos == string::npos) {
      ostringstream msg;
      msg << "Niepeny znacznik pocztkowy HTML na pozycji "
          << leftPos;
      throw runtime_error(msg.str());
    }
    s.erase(leftPos, rightPos - leftPos + 1);
  }
  // Usuwaj wszystkie znaki specjalne HTML
  replaceAll(s, "&lt;", "<");
  replaceAll(s, "&gt;", ">");
  replaceAll(s, "&amp;", "&");
  replaceAll(s, "&nbsp;", " ");
  // Itd...
  return s;
}

int main(int argc, char* argv[]) {
  requireArgs(argc, 1,
    "uycie: HTMLStripper2 PlikWejciowy");
  ifstream in(argv[1]);
  assure(in, argv[1]);
  // Wczytaj cay plik do acucha, wtedy odrzucaj
  ostringstream ss;
  ss << in.rdbuf();
  try {
    string s = ss.str();
    cout << stripHTMLTags(s) << endl;
    return EXIT_SUCCESS;
  }
  catch (runtime_error& x) {
    cout << x.what() << endl;
    return EXIT_FAILURE;
  }
} ///:~
