205 lines
7.2 KiB
C++
205 lines
7.2 KiB
C++
// CkHtmlToXmlW.h: interface for the CkHtmlToXmlW class.
|
|
//
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
// This header is generated for Chilkat 11.3.0
|
|
|
|
#ifndef _CkHtmlToXmlW_H
|
|
#define _CkHtmlToXmlW_H
|
|
|
|
#include "chilkatDefs.h"
|
|
#include "CkString.h"
|
|
#include "CkWideCharBase.h"
|
|
|
|
class CkByteData;
|
|
class CkBinDataW;
|
|
class CkStringBuilderW;
|
|
|
|
|
|
|
|
#if !defined(__sun__) && !defined(__sun)
|
|
#pragma pack (push, 8)
|
|
#endif
|
|
|
|
|
|
// CLASS: CkHtmlToXmlW
|
|
class CK_VISIBLE_PUBLIC CkHtmlToXmlW : public CkWideCharBase
|
|
{
|
|
|
|
|
|
private:
|
|
|
|
// Don't allow assignment or copying these objects.
|
|
CkHtmlToXmlW(const CkHtmlToXmlW &);
|
|
CkHtmlToXmlW &operator=(const CkHtmlToXmlW &);
|
|
|
|
public:
|
|
CkHtmlToXmlW(void);
|
|
virtual ~CkHtmlToXmlW(void);
|
|
|
|
|
|
|
|
static CkHtmlToXmlW *createNew(void);
|
|
|
|
|
|
|
|
void CK_VISIBLE_PRIVATE inject(void *impl);
|
|
|
|
// May be called when finished with the object to free/dispose of any
|
|
// internal resources held by the object.
|
|
void dispose(void);
|
|
|
|
|
|
|
|
// BEGIN PUBLIC INTERFACE
|
|
|
|
// ----------------------
|
|
// Properties
|
|
// ----------------------
|
|
// If set to true, then any non-standard HTML tags will be dropped when converting
|
|
// to XML.
|
|
bool get_DropCustomTags(void);
|
|
// If set to true, then any non-standard HTML tags will be dropped when converting
|
|
// to XML.
|
|
void put_DropCustomTags(bool newVal);
|
|
|
|
// The HTML to be converted by the ToXml method. To convert HTML to XML, first set
|
|
// this property to the HTML string and then call ToXml. The ConvertFile method can
|
|
// do file-to-file conversions.
|
|
void get_Html(CkString &str);
|
|
// The HTML to be converted by the ToXml method. To convert HTML to XML, first set
|
|
// this property to the HTML string and then call ToXml. The ConvertFile method can
|
|
// do file-to-file conversions.
|
|
const wchar_t *html(void);
|
|
// The HTML to be converted by the ToXml method. To convert HTML to XML, first set
|
|
// this property to the HTML string and then call ToXml. The ConvertFile method can
|
|
// do file-to-file conversions.
|
|
void put_Html(const wchar_t *newVal);
|
|
|
|
// Determines how to handle HTML entities. The default value, 0 will cause
|
|
// _AMP_nbsp; entites to be convert to normal space characters (ASCII value 32). If
|
|
// this property is set to 1, then _AMP_nbsp;'s will be converted to _AMP_#160. If
|
|
// set to 2, then _AMP_nbps;'s are dropped. If set to 3, then _AMP_nbsp's are left
|
|
// unmodified.
|
|
int get_Nbsp(void);
|
|
// Determines how to handle HTML entities. The default value, 0 will cause
|
|
// _AMP_nbsp; entites to be convert to normal space characters (ASCII value 32). If
|
|
// this property is set to 1, then _AMP_nbsp;'s will be converted to _AMP_#160. If
|
|
// set to 2, then _AMP_nbps;'s are dropped. If set to 3, then _AMP_nbsp's are left
|
|
// unmodified.
|
|
void put_Nbsp(int newVal);
|
|
|
|
// The charset, such as utf-8 or iso-8859-1 of the XML to be created. If XmlCharset
|
|
// is empty, the XML is created in the same character encoding as the HTML.
|
|
// Otherwise the HTML is converted XML and converted to this charset.
|
|
void get_XmlCharset(CkString &str);
|
|
// The charset, such as utf-8 or iso-8859-1 of the XML to be created. If XmlCharset
|
|
// is empty, the XML is created in the same character encoding as the HTML.
|
|
// Otherwise the HTML is converted XML and converted to this charset.
|
|
const wchar_t *xmlCharset(void);
|
|
// The charset, such as utf-8 or iso-8859-1 of the XML to be created. If XmlCharset
|
|
// is empty, the XML is created in the same character encoding as the HTML.
|
|
// Otherwise the HTML is converted XML and converted to this charset.
|
|
void put_XmlCharset(const wchar_t *newVal);
|
|
|
|
|
|
|
|
// ----------------------
|
|
// Methods
|
|
// ----------------------
|
|
// Converts an HTML file to a well-formed XML file that can be parsed for the
|
|
// purpose of programmatically extracting information.
|
|
bool ConvertFile(const wchar_t *inHtmlPath, const wchar_t *destXmlPath);
|
|
|
|
// Allows for any specified tag to be dropped from the output XML. To drop more
|
|
// than one tag, call this method once for each tag type to be dropped.
|
|
void DropTagType(const wchar_t *tagName);
|
|
|
|
// Causes text formatting tags to be dropped from the XML output. Text formatting
|
|
// tags are: b, font, i, u, br, center, em, strong, big, tt, s, small, strike, sub,
|
|
// and sup.
|
|
void DropTextFormattingTags(void);
|
|
|
|
// Convenience method for reading a complete file into a byte array.
|
|
bool ReadFile(const wchar_t *path, CkByteData &outBytes);
|
|
|
|
// Convenience method for reading a text file into a string. The character encoding
|
|
// of the text file is specified by srcCharset. Valid values, such as iso-8895-1 or utf-8
|
|
// are listed at: List of Charsets.
|
|
//
|
|
// References:
|
|
// 1: https://www.chilkatsoft.com/charsets_supported_by_chilkat.asp
|
|
bool ReadFileToString(const wchar_t *filename, const wchar_t *srcCharset, CkString &outStr);
|
|
// Convenience method for reading a text file into a string. The character encoding
|
|
// of the text file is specified by srcCharset. Valid values, such as iso-8895-1 or utf-8
|
|
// are listed at: List of Charsets.
|
|
//
|
|
// References:
|
|
// 1: https://www.chilkatsoft.com/charsets_supported_by_chilkat.asp
|
|
const wchar_t *readFileToString(const wchar_t *filename, const wchar_t *srcCharset);
|
|
|
|
// Sets the Html property from the contents of bd.
|
|
bool SetHtmlBd(CkBinDataW &bd);
|
|
|
|
// Sets the Html property from a byte array.
|
|
void SetHtmlBytes(CkByteData &inData);
|
|
|
|
// Sets the Html property by loading the HTML from a file.
|
|
bool SetHtmlFromFile(const wchar_t *filename);
|
|
|
|
// Sets the Html property from the contents of sb.
|
|
bool SetHtmlSb(CkStringBuilderW &sb);
|
|
|
|
// Converts the HTML in the Html property to XML and returns the XML string.
|
|
bool ToXml(CkString &outStr);
|
|
// Converts the HTML in the Html property to XML and returns the XML string.
|
|
const wchar_t *toXml(void);
|
|
|
|
// Converts the HTML in the Html property to XML and appends the XML to sb.
|
|
bool ToXmlSb(CkStringBuilderW &sb);
|
|
|
|
// Causes a specified type of tag to NOT be dropped in the output XML.
|
|
void UndropTagType(const wchar_t *tagName);
|
|
|
|
// Causes text formatting tags to NOT be dropped from the XML output. Text
|
|
// formatting tags are: b, font, i, u, br, center, em, strong, big, tt, s, small,
|
|
// strike, sub, and sup.
|
|
//
|
|
// Important: Text formatting tags are dropped by default. Call this method to
|
|
// prevent text formatting tags from being dropped.
|
|
//
|
|
void UndropTextFormattingTags(void);
|
|
|
|
// Convenience method for saving a byte array to a file.
|
|
bool WriteFile(const wchar_t *path, CkByteData &fileData);
|
|
|
|
// Convenience method for saving a string to a file. The character encoding of the
|
|
// output text file is specified by charset (the string is converted to this charset
|
|
// when writing). Valid values, such as iso-8895-1 or utf-8 are listed at: List of
|
|
// Charsets.
|
|
//
|
|
// References:
|
|
// 1: https://www.chilkatsoft.com/charsets_supported_by_chilkat.asp
|
|
bool WriteStringToFile(const wchar_t *stringToWrite, const wchar_t *filename, const wchar_t *charset);
|
|
|
|
// This is the same as the ToXml method. It converts the HTML in the Html property
|
|
// to XML and returns the XML string.
|
|
bool Xml(CkString &outStr);
|
|
// This is the same as the ToXml method. It converts the HTML in the Html property
|
|
// to XML and returns the XML string.
|
|
const wchar_t *xml(void);
|
|
|
|
|
|
|
|
|
|
|
|
// END PUBLIC INTERFACE
|
|
|
|
|
|
};
|
|
#if !defined(__sun__) && !defined(__sun)
|
|
#pragma pack (pop)
|
|
#endif
|
|
|
|
#endif
|