https://github.com/grossherr/mdb-parser
I’ve played with this in 2015
mdb-parser.php
<?php
/**
* MdB Parser
*/
/**
* Variables, Functions
*/
if ( $argv[1] == 'test' ) {
include 'tests.php';
}
include 'general-functions.php';
include 'tmp-functions.php';
include 'general-variables.php';
include 'variable-profil-url-daten.php';
setup_current_arr_ele( $vz, $data );
currently_processing_msg( $data );
switch ( $vz ) {
case 'bundestag':
include 'parser-bundestag.php';
break;
case 'spd':
include 'parser-spd.php';
break;
case 'gruene':
include 'parser-gruene.php';
break;
case 'linke':
include 'parser-linke.php';
break;
case 'cdu':
include 'parser-cdu.php';
break;
case 'all':
//include 'script/parser-bundestag.php';
//include 'script/parser-spd.php';
//include 'script/parser-gruene.php';
break;
default:
echo PHP_EOL . 'no match' . PHP_EOL;
break;
}
//print_r($data);
general-functions.php
<?php
function data_setup( $abgvzs, &$data, $vz_in = 'all' ) {
$count = count( $abgvzs );
if ( $vz_in == 'all' ) {
if ( $count > 0 ) {
foreach ( $abgvzs as $vz => $url ) {
vz_setup(
$vz,
$url,
$data
);
}
} elseif ( $count == 0 ) {
die( 'no list' );
}
} else {
vz_setup(
$vz_in,
$abgvzs[ $vz_in ],
$data
);
}
return $data;
}
function vz_setup( $vz, $url, &$data ) {
$data[ 'lists' ][ $vz ] = array();
$baseurl = return_base_url( $url );
$src_file_name = 'abgeordnetenliste';
$src_path = 'tmp/'. $vz . '/src/';
$src_file = $src_path . $src_file_name;
if ( ! file_exists( $src_file ) ) {
$src_url = file_get_contents( $url );
file_put_contents(
$src_file,
$src_url
);
}
$src = file_get_contents( $src_file );
//$src = file_get_contents( $url );
$domdoc = initialize_domdoc( $src );
$name = domdoc_unique_element_by_tag( $domdoc, 'title' );
$data[ 'lists' ][ $vz ] = array(
'name' => $name,
'url' => $url,
'baseurl' => $baseurl,
'src' => $src,
'domdoc' => $domdoc,
);
return $data;
}
function setup_current_arr_ele( $vz, &$data ) {
$vz_arr = array();
$data[ 'current' ] = array();
$vz_arr[ 'vz' ] = $vz;
$current = array_merge( $vz_arr, $data[ 'lists' ][ $vz ] );
$data[ 'current' ] = $current;
}
function currently_processing_msg( $data ) {
echo PHP_EOL;
echo 'Attempting to parse »' . $data[ 'current' ][ 'vz' ] . '«:' . PHP_EOL;
echo '→ ' . $data[ 'current' ][ 'name' ] . PHP_EOL;
echo '→ ' . $data[ 'current' ][ 'url' ] . PHP_EOL;
echo PHP_EOL;
}
function array_values_recursive( array $array ) {
$array = array_values( $array );
for ( $i = 0, $x = count( $array ); $i < $x; ++$i ) {
if ( is_array( $array[$i] ) ) {
$array[$i] = array_values_recursive( $array[$i] );
}
}
return $array;
}
function initialize_domdoc( $src, $err_msg = false ) {
$domdoc = new DOMDocument();
if ( ! $err_msg ) {
libxml_use_internal_errors( true );
}
$domdoc->loadHTML( $src );
return $domdoc;
}
function domdoc_unique_element_by_tag( $domdoc, $element ) {
$element_dom = $domdoc->getElementsByTagName( $element );
$unique = (bool) $element_dom->length;
if ( $unique ) {
foreach ( $element_dom as $element ) {
$result = $element->nodeValue;
}
} else {
die( 'not unique' );
}
return $result;
}
function domxpath_node_to_array( $node ) {
$array = false;
if ( $node->hasAttributes() ) {
foreach ( $node->attributes as $attr ) {
$array[ $attr->nodeName ] = $attr->nodeValue;
}
}
if ( $node->hasChildNodes() ) {
if ( $node->childNodes->length == 1 ) {
$array[ $node->firstChild->nodeName ] = $node->firstChild->nodeValue;
} else {
foreach ( $node->childNodes as $childNode ) {
if ( $childNode->nodeType != XML_TEXT_NODE ) {
$array[ $childNode->nodeName ][] = domxpath_node_to_array( $childNode );
}
}
}
}
return $array;
}
function get_element_by_tag_name_unique( $node, $tag_name, $type = '' ) {
$element = $node->getElementsByTagName( $tag_name );
$element_exists = $element->length;
if ( $element_exists ) {
foreach ( $element as $ele ) {
if ( empty( $type ) ) {
$result = $ele->nodeValue;
} elseif ( ! empty( $type ) ) {
$result = $ele->getAttribute( $type );
}
}
} else {
$result = false;
}
return $result;
}
function get_element_by_tag_name_item_nr( $node, $tag_name, $nr = '0', $type = '' ) {
$element = $node->getElementsByTagName( $tag_name );
$element_item = $element->item( $nr );
$element_exists = count( $element_item );
if ( $element_exists ) {
if ( empty( $type ) ) {
$result = $element_item->nodeValue;
} elseif ( ! empty( $type ) ) {
$result = $element_item->getAttribute( $type );
}
} else {
$result = false;
}
return $result;
}
function array_trim( &$array ) {
$array = array_map( 'trim', $array );
}
function return_base_url( $url ) {
$parsed = parse_url( $url );
$base = $parsed[ 'scheme' ] . '://' . $parsed[ 'host' ];
return $base;
}
function mailto_remover( $href ) {
$mail = preg_replace( '/^mailto:/', '', $href );
return $mail;
}
general-variables.php
<?php
// Daten Container
$data = array();
$data[ 'args' ] = array();
// Arguments $argv
$data[ 'args' ][ 'script' ][ 'name' ] = $argv[ 0 ];
if ( ! isset( $argv[ 1 ] ) ) {
$data[ 'args' ][ 'script' ][ 'vz' ] = 'spd';
} else {
$data[ 'args' ][ 'script' ][ 'vz' ] = $argv[ 1 ];
}
// Abgeordnetenverzeichnisse
$abg_vzs_std = array(
'bundestag' => 'http://www.bundestag.de/bundestag/abgeordnete18/alphabet',
'spd' => 'http://www.spdfraktion.de/abgeordnete/all?view=list',
'gruene' => 'http://www.gruene-bundestag.de/fraktion/abgeordnete_ID_4389869.html',
'linke' => 'http://www.linksfraktion.de/abgeordnete/',
'cdu' => 'https://www.cducsu.de/abgeordnete',
);
$data[ 'args' ][ 'abgvzs' ] = $abg_vzs_std;
$abgvzs = $data[ 'args' ][ 'abgvzs' ];
$vz = $data[ 'args' ][ 'script' ][ 'vz' ];
data_setup( $abgvzs, $data, $vz );
tmp-functions.php
<?php
function XMLToArrayFlat($xml, &$return, $path='', $root=false)
{
$children = array();
if ($xml instanceof SimpleXMLElement) {
$children = $xml->children();
if ($root){ // we're at root
$path .= '/'.$xml->getName();
}
}
if ( count($children) == 0 ){
$return[$path] = (string)$xml;
return;
}
$seen=array();
foreach ($children as $child => $value) {
$childname = ($child instanceof SimpleXMLElement)?$child->getName():$child;
if ( !isset($seen[$childname])){
$seen[$childname]=0;
}
$seen[$childname]++;
XMLToArrayFlat($value, $return, $path.'/'.$child.'['.$seen[$childname].']');
}
}
/**
* XML2Array: A class to convert XML to array in PHP
* It returns the array which can be converted back to XML using the Array2XML script
* It takes an XML string or a DOMDocument object as an input.
*
* See Array2XML: http://www.lalit.org/lab/convert-php-array-to-xml-with-attributes
*
* Author : Lalit Patel
* Website: http://www.lalit.org/lab/convert-xml-to-array-in-php-xml2array
* License: Apache License 2.0
* http://www.apache.org/licenses/LICENSE-2.0
* Version: 0.1 (07 Dec 2011)
* Version: 0.2 (04 Mar 2012)
* Fixed typo 'DomDocument' to 'DOMDocument'
*
* Usage:
* $array = XML2Array::createArray($xml);
*/
class XML2Array {
private static $xml = null;
private static $encoding = 'UTF-8';
/**
* Initialize the root XML node [optional]
* @param $version
* @param $encoding
* @param $format_output
*/
public static function init($version = '1.0', $encoding = 'UTF-8', $format_output = true) {
self::$xml = new DOMDocument($version, $encoding);
self::$xml->formatOutput = $format_output;
self::$encoding = $encoding;
}
/**
* Convert an XML to Array
* @param string $node_name - name of the root node to be converted
* @param array $arr - aray to be converterd
* @return DOMDocument
*/
public static function &createArray($input_xml) {
$xml = self::getXMLRoot();
if(is_string($input_xml)) {
$parsed = $xml->loadXML($input_xml);
if(!$parsed) {
throw new Exception('[XML2Array] Error parsing the XML string.');
}
} else {
if(get_class($input_xml) != 'DOMDocument') {
throw new Exception('[XML2Array] The input XML object should be of type: DOMDocument.');
}
$xml = self::$xml = $input_xml;
}
$array[$xml->documentElement->tagName] = self::convert($xml->documentElement);
self::$xml = null; // clear the xml node in the class for 2nd time use.
return $array;
}
/**
* Convert an Array to XML
* @param mixed $node - XML as a string or as an object of DOMDocument
* @return mixed
*/
private static function &convert($node) {
$output = array();
switch ($node->nodeType) {
case XML_CDATA_SECTION_NODE:
$output['@cdata'] = trim($node->textContent);
break;
case XML_TEXT_NODE:
$output = trim($node->textContent);
break;
case XML_ELEMENT_NODE:
// for each child node, call the covert function recursively
for ($i=0, $m=$node->childNodes->length; $i<$m; $i++) {
$child = $node->childNodes->item($i);
$v = self::convert($child);
if(isset($child->tagName)) {
$t = $child->tagName;
// assume more nodes of same kind are coming
if(!isset($output[$t])) {
$output[$t] = array();
}
$output[$t][] = $v;
} else {
//check if it is not an empty text node
if($v !== '') {
$output = $v;
}
}
}
if(is_array($output)) {
// if only one node of its kind, assign it directly instead if array($value);
foreach ($output as $t => $v) {
if(is_array($v) && count($v)==1) {
$output[$t] = $v[0];
}
}
if(empty($output)) {
//for empty nodes
$output = '';
}
}
// loop through the attributes and collect them
if($node->attributes->length) {
$a = array();
foreach($node->attributes as $attrName => $attrNode) {
$a[$attrName] = (string) $attrNode->value;
}
// if its an leaf node, store the value in @value instead of directly storing it.
if(!is_array($output)) {
$output = array('@value' => $output);
}
$output['@attributes'] = $a;
}
break;
}
return $output;
}
/*
* Get the root XML node, if there isn't one, create it.
*/
private static function getXMLRoot(){
if(empty(self::$xml)) {
self::init();
}
return self::$xml;
}
}
tests.php
<?php
include 'variable-profil-url-daten.php';
//file_put_contents( 'tmp/test/default_profil_url_arr_struc.php', var_export( $profil_url_daten, true ) );
$profil_url_daten_obj = new ProfilURLDaten();
//print_r( $profil_url_daten_obj );
$profil_url_daten_obj->pud_dump();
die;
function af_cb_empty_all_array_values(&$v, $k) {
$v = '';
}
function array_merge_recursive_distinct ( array &$array1, array &$array2 ) {
$merged = $array1;
foreach ( $array2 as $key => &$value ) {
if ( is_array ( $value ) && isset ( $merged [$key] ) && is_array ( $merged [$key] ) )
{
$merged [$key] = array_merge_recursive_distinct ( $merged [$key], $value );
}
else
{
$merged [$key] = $value;
}
}
return $merged;
}
function array_of_arrays_merge_recusrsive_distinct( &$array_to_analyse ) {
foreach ( $array_to_analyse as $key => $value ) {
if ( $key == 0 ) continue;
$current_key = $key;
$last_key = $key - 1;
$current_array = $array_to_analyse[ $current_key ];
$last_array = $array_to_analyse[ $last_key ];
if ( $key == 1 ) {
$merge_array = $last_array;
} else {
$merge_array = $new_distinct_array;
}
$new_distinct_array = array_merge_recursive_distinct( $current_array, $merge_array );
unset( $array_to_analyse[ $last_key ] );
}
return $new_distinct_array;
}
function bt_xml_distinct_array_mitgliedschaften_korrektor( &$bt_xml_array ) {
foreach (
$bt_xml_array
[ 'btpXMLData' ]
[ 'mdb' ]
[ 'mdbInfo' ]
[ 'mdbMitgliedschaften' ]
as $key => $value
) {
//print_r($key.PHP_EOL);
$mdb_gremien_exep = array(
'mdbStellvVorsitzSonstigesGremium',
'mdbVorsitzSonstigesGremium'
);
if ( empty( $value ) ) continue;
foreach ( $value as $ke => $va ) {
//print_r($ke.PHP_EOL);
if ( $ke == '@attributes' ) continue;
foreach ( $va as $k => $v) {
//print_r($k.PHP_EOL);
if (
! is_int( $k )
&& ! in_array(
$ke,
$mdb_gremien_exep
)
) {
unset( $va[ $k ] );
}
if ( $k > 0 ) {
unset( $va[ $k ] );
}
//print_r($k.PHP_EOL);
}
//print_r($va);
$bt_xml_array[ 'btpXMLData' ][ 'mdb' ][ 'mdbInfo' ][ 'mdbMitgliedschaften' ][ $key ][ $ke ] = $va;
}
}
}
$tmp = json_decode(file_get_contents('tmp/bt_mdb_db.json'), true);
//print_r(count($tmp[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ]).PHP_EOL);
//print_r($tmp[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ][ 629 ][ 'btpXMLData' ][ 'mdb' ][ 'mdbInfo' ][ 'mdbMitgliedschaften' ][ 'mdbOrdentlichesMitgliedGremien' ]);die;
$cdu_tmp = json_decode(file_get_contents('tmp/cdu_mdb_db.json'), true);
$spd_tmp = json_decode(file_get_contents('tmp/spd_mdb_db.json'), true);
$linke_tmp = json_decode(file_get_contents('tmp/linke_mdb_db.json'), true);
$gruene_tmp = json_decode(file_get_contents('tmp/gruene_mdb_db.json'), true);
$mdbs_array_tmp = $tmp[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ];
//print_r( $mdbs_array_tmp );die;
//array_walk_recursive( $mdbs_array_tmp, 'af_cb_empty_all_array_values' );
//print_r( $mdbs_array_tmp );die;
//print_r(array_keys($mdbs_array_tmp));die;
// $cdu_mdb_tmp = $cdu_tmp[0];
// $spd_mdb_tmp = $spd_tmp[0];
// $linke_mdb_tmp = $linke_tmp[0];
// $gruene_mdb_tmp = $gruene_tmp[0];
// array_walk_recursive( $cdu_mdb_tmp, 'af_cb_empty_all_array_values' );
// array_walk_recursive( $spd_mdb_tmp, 'af_cb_empty_all_array_values' );
// array_walk_recursive( $linke_mdb_tmp, 'af_cb_empty_all_array_values' );
// array_walk_recursive( $gruene_mdb_tmp, 'af_cb_empty_all_array_values' );
// file_put_contents( 'tmp/test/cdu_arr_struc.php', var_export( $cdu_mdb_tmp, true ) );
// file_put_contents( 'tmp/test/spd_arr_struc.php', var_export( $cdu_mdb_tmp, true ) );
// file_put_contents( 'tmp/test/linke_arr_struc.php', var_export( $cdu_mdb_tmp, true ) );
// file_put_contents( 'tmp/test/gruene_arr_struc.php', var_export( $cdu_mdb_tmp, true ) );
// //print_r( $cdu_mdb_tmp );
// die;
//$alle_parteien_url_tmp = $cdu_tmp + $spd_tmp + $linke_tmp + $gruene_tmp;
$alle_parteien_url_tmp = array_merge( $cdu_tmp, $spd_tmp, $linke_tmp, $gruene_tmp );
//print_r(count($alle_parteien_url_tmp).PHP_EOL);
//print_r($alle_parteien_url_tmp);
// PF URL
$array_to_analyse = $spd_tmp;
array_walk_recursive( $array_to_analyse, 'af_cb_empty_all_array_values' );
$new_distinct_array = array_of_arrays_merge_recusrsive_distinct( $array_to_analyse );
//print_r( $new_distinct_array ); die;
//file_put_contents( 'tmp/test/url_misc_arr_struc.php', var_export( $new_distinct_array, true ) );
print_r($new_distinct_array);die;
// BT XML
$array_to_analyse = $mdbs_array_tmp;
array_walk_recursive( $array_to_analyse, 'af_cb_empty_all_array_values' );
$new_distinct_array = array_of_arrays_merge_recusrsive_distinct( $array_to_analyse );
//print_r( $new_distinct_array ); die;
bt_xml_distinct_array_mitgliedschaften_korrektor( $new_distinct_array );
//print_r($new_distinct_array[ 'btpXMLData' ][ 'mdb' ][ 'mdbInfo' ][ 'mdbMitgliedschaften' ]);die;
//file_put_contents( 'tmp/test/xml_arr_struc.php', var_export( $new_distinct_array, true ) );
print_r($new_distinct_array);die;
die;
function recursive_array_search($needle,&$haystack) {
foreach($haystack as $key=>$value) {
$current_key=$key;
if($needle===$value OR (is_array($value) && recursive_array_search($needle,$value) !== false)) {
return $current_key;
}
unset( $haystack[ $key ] );
}
return false;
}
function recursive_array_search_two( $needle_one, $needle_two, $haystack ) {
$ret_ind = false;
while ( count( $haystack ) > 0 ) {
$needle_one_res = recursive_array_search( $needle_one, $haystack );
if ( $needle_one_res == false ) {
return false;
} else {
$needle_two_res = recursive_array_search( $needle_two, $haystack[ $needle_one_res ] );
}
if ( $needle_two_res === 'Name' ) {
$ret_ind = true;
break;
} else {
$haystack = array_filter( $haystack );
}
}
if ( $ret_ind == true ) {
return $needle_one_res;
} else {
return false;
}
}
function set_nns_vns_var_for_search( $array, $index, &$nns, &$vns ) {
$nns = $array[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ][ $index ]['btpXMLData']['mdb']['mdbInfo']['mdbZuname'];
//print_r($nns);
$vns = $array[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ][ $index ]['btpXMLData']['mdb']['mdbInfo']['mdbVorname'];
//print_r($vns);
}
$nns = '';
$vns = '';
set_nns_vns_var_for_search( $tmp, 34, $nns, $vns );
print_r( $nns . PHP_EOL );
print_r( $vns . PHP_EOL );
$mfp = recursive_array_search_two( $nns, $vns, $alle_parteien_url_tmp );
print_r( $mfp . PHP_EOL );
//print_r( $alle_parteien_url_tmp[ $mfp ] );
//print_r( $tmp[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ][ 34 ]);
//die;
foreach ( $tmp[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ] as $k => $v ) {
if ($k > 0 ) die;
// print_r($v);
// print_r($k.PHP_EOL);
// print_r($v['mdbName']['@value'].PHP_EOL);
$name = $v['btpXMLData']['mdb']['mdbInfo']['mdbAdelstitel'] . ' ' . $v['btpXMLData']['mdb']['mdbInfo']['mdbZuname'];
// print_r($v['btpXMLData']['mdb']['mdbInfo']['mdbAdelstitel'] . ' ' . $v['btpXMLData']['mdb']['mdbInfo']['mdbZuname'].PHP_EOL);
// print_r($v['btpXMLData']['mdb']['mdbInfo']['mdbVorname'].PHP_EOL);
// print_r($v['btpXMLData']['mdb']['mdbInfo']['mdbPartei'].PHP_EOL);
// print_r( $linke_tmp[ recursive_array_search( $name, $linke_tmp ) ] );
$pfp = $linke_tmp[ recursive_array_search( $name, $linke_tmp ) ];
$tmp[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ][ $k ][ 'pfpURLData' ] = $pfp;
print_r($tmp[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ][ $k ]);
// if (
// ! empty(
// $tmp[ 'mdbUebersicht' ]
// [ 'mdbs' ][ 'mdb' ]
// [ $k ][ 'btpXMLData' ][ 'mdb' ]
// [ 'mdbInfo' ][ 'mdbMitgliedschaften' ]
// [ 'mdbStellvVorsitzSonstigeGremien' ]
// [ 'mdbStellvVorsitzSonstigesGremium' ]
// )
// ) {
// print_r(
// $tmp[ 'mdbUebersicht' ]
// [ 'mdbs' ][ 'mdb' ]
// [ $k ][ 'btpXMLData' ][ 'mdb' ]
// [ 'mdbInfo' ][ 'mdbMitgliedschaften' ]
// [ 'mdbStellvVorsitzSonstigeGremien' ]
// [ 'mdbStellvVorsitzSonstigesGremium' ]
// );
// }
}
// $names = array_column( $tmp[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ], 'mdbName' );
// $names = array_map(
// function ($value) {
// return $value['btpXMLData']['mdb']['mdbInfo']['mdbZuname'] . ' ' . $value['btpXMLData']['mdb']['mdbInfo']['mdbVorname'];
// },
// $tmp[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ]
// );
// print_r(array_unique($names));
die;
parser-bundestag.php
<?php
$db_file_arr = 'tmp/bt_mdb_db';
$bt_domdoc = $data[ 'current' ][ 'domdoc' ];
$xml_url = 'http://www.bundestag.de/xml/mdb/index.xml';
$xml_file_bn = basename( $xml_url );
$xml_file_bn_wo_ext = basename( $xml_file_bn, '.xml' );
$xml_path = 'tmp/bundestag/xml/';
$arr_path = 'tmp/bundestag/array/';
$xml_path_abg = $xml_path . 'abg/';
$arr_path_abg = $arr_path . 'abg/';
$xml_file = $xml_path . $xml_file_bn;
$arr_file = $arr_path . $xml_file_bn_wo_ext;
if ( ! file_exists( $xml_file ) ) {
$xml_src_url = file_get_contents( $xml_url );
file_put_contents(
$xml_file,
$xml_src_url
);
}
$xml_src = file_get_contents( $xml_file );
$xml_src_arr = XML2Array::createArray( $xml_src );
if ( ! file_exists( $arr_file ) ) {
file_put_contents(
$arr_file,
var_export( $xml_src_arr, true )
);
}
//print_r( $txarray[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ] );
foreach ( $xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ] as $key => $abg ) {
//print_r($key . PHP_EOL );
//print_r( $abg[ 'mdbInfoXMLURL' ] . PHP_EOL );
$status = $abg[ 'mdbID' ][ '@attributes' ][ 'status' ];
if ( $status != 'Aktiv' ) {
//print_r( $status . PHP_EOL );
//print_r( $key . PHP_EOL );
unset( $xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ][ $key ] );
continue;
}
$abg_xml_url = $abg[ 'mdbInfoXMLURL'];
$abg_file_bn = basename( $abg_xml_url );
$abg_file_bn_wo_ext = basename( $abg_file_bn, '.xml' );
$abg_xml_file = $xml_path_abg . $abg_file_bn;
$abg_arr_file = $arr_path_abg . $abg_file_bn_wo_ext;
if ( ! file_exists( $abg_xml_file ) ) {
file_put_contents(
$abg_xml_file,
file_get_contents( $abg_xml_url )
);
}
$abg_xml_src = file_get_contents( $abg_xml_file );
$abg_arr_src = XML2Array::createArray($abg_xml_src);
if ( ! file_exists( $abg_arr_file ) ) {
file_put_contents(
$abg_arr_file,
var_export( $abg_arr_src, true )
);
}
$xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ][ $key ][ 'btpXMLData' ] = $abg_arr_src;
/***
$voa_box = $profil_xpath->query( '//*[contains( @class, "voa_tab1" )]' );
//print_r($voa_box);
$voa_arr = array();
foreach ( $voa_box as $vb ) {
//print_r($vb);
//print_r(preg_match('/Stufe ([0-9]+)/i', $vb->nodeValue, $voa_st ));
//print_r($hit);
preg_match('/Stufe ([0-9]+)/i', $vb->nodeValue, $voa_st );
if ( ! empty( $voa_st ) ) {
$eml = preg_match('/([0-9]+), Stufe/i', $vb->nodeValue);
$mtl = preg_match('/(monatlich), Stufe/i', $vb->nodeValue);
$jhr = preg_match('/(jährlich), Stufe/i', $vb->nodeValue);
print_r($vb->nodeValue.PHP_EOL);print_r('e:'.$eml);print_r('m:'.$mtl);print_r('j:'.$jhr.PHP_EOL);
if ( $eml || $mtl || $jhr ) {
if ( $eml ) {
$voa_arr['stufen'][ 'eml' ][] = $voa_st[1];
} elseif ( $mtl ) {
$voa_arr['stufen'][ 'mtl' ][] = $voa_st[1];
} elseif ( $jhr ) {
$voa_arr['stufen'][ 'jhr' ][] = $voa_st[1];
}
} else {
echo 'nich möglich';
}
}
}
$voa_stufen = array(
'min' => array(
'1' => '1000',
'2' => '3501',
'3' => '7001',
'4' => '15001',
'5' => '30001',
'6' => '50001',
'7' => '75001',
'8' => '100001',
'9' => '150001',
'10' => '250001',
),
'max' => array(
)
);
if ( ! empty( $voa_arr ) ) {
$voa_min = '0';
if ( isset( $voa_arr[ 'stufen' ][ 'eml' ] ) ) {
foreach ( $voa_arr[ 'stufen' ][ 'eml' ] as $emlst ) {
$voa_min = $voa_min + $voa_stufen[ 'min' ][ $emlst ];
}
}
if ( isset( $voa_arr[ 'stufen' ][ 'mtl' ] ) ) {
foreach ( $voa_arr[ 'stufen' ][ 'mtl' ] as $mtlst ) {
$voa_min = $voa_min + ( 12 * $voa_stufen[ 'min' ][ $mtlst ]);
}
}
if ( isset( $voa_arr[ 'stufen' ][ 'jhr' ] ) ) {
foreach ( $voa_arr[ 'stufen' ][ 'jhr' ] as $jhrst ) {
$voa_min = $voa_min + $voa_stufen[ 'min' ][ $jhrst ];
}
}
$voa_arr[ 'min' ] = $voa_min;
}
/***/
$bt_mdb_url_data = array();
$bt_mdb_url_data[] = array(
'VOA' => '',
);
$xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ][ $key ][ 'btpURLData' ] = $bt_mdb_url_data;
}
//print_r( count($xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ]) . PHP_EOL );
array_splice( $xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ], 0 , 0 );
//print_r( count($xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ]) . PHP_EOL );
//krsort( $xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ] );
//print_r( key( $xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ] ) ); die;
// print_r( key( $xml_src_arr[ 'mdbUebersicht' ][ 'mdbs' ][ 'mdb' ] ) ); die;
file_put_contents( $db_file_arr, var_export( $xml_src_arr, true ) );
file_put_contents( $db_file_arr.'.json', json_encode( $xml_src_arr, true ) );
parser-cdu.php
<?php
// $profil_url_daten_obj = new ProfilURLDaten();
// //print_r( $profil_url_daten_obj );
// $profil_url_daten_obj->pud_dump();
// die;
$mdb_db = array();
$mdb_db_tmp = array();
$db_file_arr = 'tmp/cdu_mdb_db';
$baseurl = $data[ 'current' ][ 'baseurl' ];
$mdb_lst_pg_dd = $data[ 'current' ][ 'domdoc' ];
$mdb_lst_xp = new DOMXpath( $mdb_lst_pg_dd );
$mdb_lst_divs = $mdb_lst_xp->query(
'//div[contains( @class, "abgeordnete_az_content" )]
/div[contains( @class, "node-abgeordneter" )]'
);
//print_r($mdb_lst_divs);
foreach ( $mdb_lst_divs as $mdb_lst_div ) {
//print_r($mdb_lst_div);
$profil_url_daten_obj = new ProfilURLDaten();
$mdb_data_arr = $profil_url_daten_obj->profil_url_daten;
$name_from_list = $mdb_lst_xp->query(
'.//div[contains(@class, "group-right")]/h2/a',
$mdb_lst_div
);
//print_r( $name_from_list );
foreach ( $name_from_list as $nfl ) {
//print_r( $nfl );
$name_arr = array();
$nfl_val = $nfl->nodeValue;
//print_r($nfl_val . PHP_EOL );
$mdb_pl_href = $nfl->getAttribute( 'href' );
//print_r( $mdb_pl_href . PHP_EOL );
// Clean Up Whitespaces
$nfl_str = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $nfl_val );
//print_r($nfl_str . PHP_EOL );
$nfl_str = preg_replace( '/([A-Z])(\.)/', '$1', $nfl_str );
//print_r($nfl_str . PHP_EOL );
// Separate Title from Name
$nfl_ti_fn_nn_arr = explode( '.', $nfl_str );
$nfl_fn_nn_str = trim( array_pop( $nfl_ti_fn_nn_arr ) );
// Create Array with NN FN
$nfl_fn_nn_arr = explode( ' ', $nfl_fn_nn_str );
//print_r($nfl_fn_nn_arr);
if ( count($nfl_fn_nn_arr) > 2 ) {
//print_r($nfl_fn_nn_arr);
if ( preg_match( '/\b(\sde|Graf\s)\b/', $nfl_str ) ) {
//print_r(preg_match( '/\b(\sde|Graf\s)\b/', $nfl_str ));
$mdb_nn = trim( $nfl_fn_nn_arr[1] . ' ' . $nfl_fn_nn_arr[2] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
} elseif ( preg_match( '/\b(\svon\s)\b/', $nfl_str ) ) {
//print_r(preg_match( '/\b(\svon\s)\b/', $nfl_str ));
if ( preg_match( '/\b(\svon der\s|\sFreiherr von\s)\b/', $nfl_str ) ) {
$mdb_nn = trim( $nfl_fn_nn_arr[1] . ' ' . $nfl_fn_nn_arr[2] . ' ' . $nfl_fn_nn_arr[3] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
} elseif ( preg_match( '/\b(\svon\s)\b/', $nfl_str ) ) {
$mdb_nn = trim( $nfl_fn_nn_arr[1] . ' ' . $nfl_fn_nn_arr[2] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
}
} else {
$mdb_nn = trim( $nfl_fn_nn_arr[2] );
if ( strlen( $nfl_fn_nn_arr[1] ) == 1 ) {
$mdb_vn = trim( $nfl_fn_nn_arr[0] . ' ' . $nfl_fn_nn_arr[1] . '.' );
} else {
$mdb_vn = trim( $nfl_fn_nn_arr[0] . ' ' . $nfl_fn_nn_arr[1] );
}
}
} else {
$mdb_nn = trim( $nfl_fn_nn_arr[1] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
}
//print_r( $mdb_vn . ' ' . $mdb_nn . PHP_EOL );
$nfl_ti_arr = $nfl_ti_fn_nn_arr;
$mdb_ti = '';
if ( ! empty( $nfl_ti_arr ) ) {
foreach ( $nfl_ti_arr as $title_ele ) {
$mdb_ti .= $title_ele . '.';
}
}
$name_arr = array(
'Vorname' => $mdb_vn,
'Nachname' => $mdb_nn,
'Titel' => $mdb_ti,
);
//print_r($name_arr);
$mdb_data_arr[ 'name' ][ 'nachname' ] = $mdb_nn;
$mdb_data_arr[ 'name' ][ 'vorname' ] = $mdb_vn;
$mdb_data_arr[ 'name' ][ 'adelszusatz' ] = '';
$mdb_data_arr[ 'name' ][ 'titel' ] = $mdb_ti;
}
$mdb_pl_url = $baseurl . $mdb_pl_href;
//print_r( $mdb_pl_url . PHP_EOL );
$src_file_name = basename( $mdb_pl_url );
$src_path = 'tmp/cdu/src/abg/';
$src_file = $src_path . $src_file_name;
if ( ! file_exists( $src_file ) ) {
$src_url = file_get_contents( $mdb_pl_url );
file_put_contents(
$src_file,
$src_url
);
}
$mdb_pl_src = file_get_contents( $src_file );
//$mdb_pl_src = file_get_contents( $mdb_pl_url );
$mdb_pl_dd = initialize_domdoc( $mdb_pl_src );
$mdb_pl_xp = new DOMXpath( $mdb_pl_dd );
$mdb_cntc_adr_arr = array();
$mdb_pl_ba = $mdb_pl_xp->query(
"//div[contains( @class, 'group-adressinfo' )]
//div[contains( @class, 'field-name-field-kontakt-berlin' )]
//div[contains( @class, 'adr' )]"
);
//print_r( $mdb_pl_ba );
foreach ( $mdb_pl_ba as $mp_ba ) {
//print_r( $mp_ba );
//$mp_ba_val_str = $mp_ba->nodeValue;
//print_r( $mp_ba_val_str . PHP_EOL );
$bb_adr_data = array(
'Bezeichnung' => 'Berliner Büro',
'Adresse' => array(
'frag' => array(
'Straße' => 'Platz der Republik',
'Hausnummer' => '1',
'Postleitzahl' => '11011',
'Ort' => 'Berlin'
),
)
);
$bb_adr_data[ 'Adresse' ][ 'full' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Straße' ]
. ' '
. $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Hausnummer' ]
. PHP_EOL
. $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Postleitzahl' ]
. ' '
. $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Ort' ];
$kontakt_arr = $profil_url_daten_obj->kontakt;
/***
$mpax_sa_div = $mdb_pl_xp->query(
'./div[contains( @class, "street-address" )]',
$mp_ba
);
//print_r( $mpax_sa_div );
foreach ( $mpax_sa_div as $mp_sa_div ) {
//print_r( $mp_sa_div );
$mp_sa_div_val = $mp_sa_div->nodeValue;
print_r( $mp_sa_div_val );
}
$mpax_pc_div = $mdb_pl_xp->query(
'./span[contains( @class, "postal-code" )]',
$mp_ba
);
//print_r( $mpax_pc_div );
foreach ( $mpax_pc_div as $mp_pc_div ) {
//print_r( $mp_pc_div );
$mp_pc_div_val = $mp_pc_div->nodeValue;
print_r( $mp_pc_div_val );
}
$mpax_ly_div = $mdb_pl_xp->query(
'./span[contains( @class, "locality" )]',
$mp_ba
);
//print_r( $mpax_ly_div );
foreach ( $mpax_ly_div as $mp_ly_div ) {
//print_r( $mp_ly_div );
$mp_ly_div_val = $mp_ly_div->nodeValue;
print_r( $mp_ly_div_val );
}
/***/
$kontakt_arr = $profil_url_daten_obj->kontakt;
/***/
$mpax_em_div = $mdb_pl_xp->query(
'./div[contains( @class, "email" )]/span/a',
$mp_ba
);
//print_r( $mpax_em_div );
foreach ( $mpax_em_div as $mp_em_div ) {
//print_r( $mp_em_div );
$mp_em_div_val = preg_replace( '/\n|\r|\s+/m', '', $mp_em_div->nodeValue );
//print_r( $mp_em_div_val );
$bb_mail = $mp_em_div_val;
}
$mpax_tl_div = $mdb_pl_xp->query(
'./div[contains( @class, "tel" )]/span',
$mp_ba
);
//print_r( $mpax_tl_div );
foreach ( $mpax_tl_div as $mp_tl_div ) {
//print_r( $mp_ly_div );
$mp_tl_div_val = preg_replace( '/\n|\r|\s+/m', '', $mp_tl_div->nodeValue );
//print_r( $mp_tl_div_val );
$bb_tel = preg_replace(
'/[\/\-]/',
' ',
$mp_tl_div_val
);
//print_r( $bb_tel . PHP_EOL );
}
/***/
$bb_adr_data[ 'Kontakt' ][ 'Mail' ] = $bb_mail;
$bb_adr_data[ 'Kontakt' ][ 'Telefon' ] = $bb_tel;
$mdb_cntc_adr_arr[] = $bb_adr_data;
$kontakt_arr[ 'bezeichnung' ] = 'Berliner Büro';
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ] = $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Straße' ];
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ] = $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Hausnummer' ];
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ] = $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Postleitzahl' ];
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ] = $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Ort' ];
$kontakt_arr[ 'adresse' ][ 'komplett' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ]
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ];
$kontakt_arr[ 'eda' ][ 'mail' ] = $bb_mail;
$kontakt_arr[ 'eda' ][ 'telefon' ] = $bb_tel;
$kontakt_arr[ 'eda' ][ 'fax' ] = '';
$mdb_data_arr[ 'kontakte' ][] = $kontakt_arr;
}
/***/
$mdb_pl_wa = $mdb_pl_xp->query(
"//div[contains( @class, 'group-adressinfo' )]
//div[contains( @class, 'group-wahl-wrapper' )]
//div[contains( @class, 'adr' )]"
);
//print_r( $mdb_pl_wa );
//print_r( $mdb_pl_wa->length . PHP_EOL );
foreach ( $mdb_pl_wa as $mp_wa ) {
//print_r( $mp_wa );
$kontakt_arr = $profil_url_daten_obj->kontakt;
$wa_str = '';
$wa_hnr = '';
$wa_plz = '';
$wa_ort = '';
$wa_mail = '';
$wa_tel = '';
$wa_adr_data = array();
$wa_adr_data[ 'Bezeichnung' ] = 'Wahlkreisbüro';
$mp_wa_val_str = preg_replace( '/\n|\r|\s+/m', '', $mp_wa->nodeValue );
//print_r( $mp_wa_val_str . PHP_EOL );
if ( empty( $mp_wa_val_str ) ) {
//print_r( 'leer' . PHP_EOL );
continue;
}
//print_r( $mp_wa );
$mpax_sa_div = $mdb_pl_xp->query(
'./div[contains( @class, "street-address" )]',
$mp_wa
);
//print_r( $mpax_sa_div );
foreach ( $mpax_sa_div as $mp_sa_div ) {
//print_r( $mp_sa_div );
$mp_sa_div_val = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $mp_sa_div->nodeValue );
//print_r( $mp_sa_div_val );
$str_hnr_arr = preg_split( '/(?=\d)/', $mp_sa_div_val, 2 );
array_trim( $str_hnr_arr );
//print_r( $str_hnr_arr );
$wa_str = $str_hnr_arr[ 0 ];
$wa_hnr = $str_hnr_arr[ 1 ];
}
$mpax_pc_div = $mdb_pl_xp->query(
'./span[contains( @class, "postal-code" )]',
$mp_wa
);
//print_r( $mpax_pc_div );
foreach ( $mpax_pc_div as $mp_pc_div ) {
//print_r( $mp_pc_div );
$mp_pc_div_val = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $mp_pc_div->nodeValue );
//print_r( $mp_pc_div_val );
$wa_plz = $mp_pc_div_val;
}
$mpax_ly_div = $mdb_pl_xp->query(
'./span[contains( @class, "locality" )]',
$mp_wa
);
//print_r( $mpax_ly_div );
foreach ( $mpax_ly_div as $mp_ly_div ) {
//print_r( $mp_ly_div );
$mp_ly_div_val = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $mp_ly_div->nodeValue );
//print_r( $mp_ly_div_val );
$wa_ort = $mp_ly_div_val;
}
$mpax_em_div = $mdb_pl_xp->query(
'./div[contains( @class, "email" )]/span/a',
$mp_wa
);
//print_r( $mpax_em_div );
foreach ( $mpax_em_div as $mp_em_div ) {
//print_r( $mp_em_div );
$mp_em_div_val = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $mp_em_div->nodeValue );
//print_r( $mp_em_div_val );
$wa_mail = $mp_em_div_val;
}
$mpax_tl_div = $mdb_pl_xp->query(
'./div[contains( @class, "tel" )]/span',
$mp_wa
);
//print_r( $mpax_tl_div );
foreach ( $mpax_tl_div as $mp_tl_div ) {
//print_r( $mp_ly_div );
$mp_tl_div_val = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $mp_tl_div->nodeValue );
//print_r( $mp_tl_div_val );
$wa_tel = preg_replace(
'/[\/\-]/',
' ',
$mp_tl_div_val
);
}
$wa_adr_data[ 'Adresse' ][ 'frag' ] = array (
'Straße' => $wa_str,
'Hausnummer' => $wa_hnr,
'Postleitzahl' => $wa_plz,
'Ort' => $wa_ort,
);
$wa_adr_data[ 'Adresse' ][ 'full' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $wa_str
. ' '
. $wa_hnr
. PHP_EOL
. $wa_plz
. ' '
. $wa_ort;
$wa_adr_data[ 'Kontakt' ][ 'Mail' ] = $wa_mail;
$wa_adr_data[ 'Kontakt' ][ 'Telefon' ] = $wa_tel;
$mdb_cntc_adr_arr[] = $wa_adr_data;
$kontakt_arr[ 'bezeichnung' ] = 'Wahlkreisbüro';
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ] = $wa_str;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ] = $wa_hnr;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ] = $wa_plz;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ] = $wa_ort;
$kontakt_arr[ 'adresse' ][ 'komplett' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ]
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ];
$kontakt_arr[ 'eda' ][ 'mail' ] = $wa_mail;
$kontakt_arr[ 'eda' ][ 'telefon' ] = $wa_tel;
$kontakt_arr[ 'eda' ][ 'fax' ] = '';
$mdb_data_arr[ 'kontakte' ][] = $kontakt_arr;
}
/***/
$wahl_arr = array();
$mdb_pl_wk = $mdb_pl_xp->query(
"//div[contains( @class, 'group-adressinfo' )]
//div[contains( @class, 'group-wahl-wrapper' )]
//div[contains( @class, 'wahlkreis-name' )]"
);
//print_r( $mdb_pl_wk );
//print_r( $mdb_pl_wk->length . PHP_EOL );
foreach ( $mdb_pl_wk as $mp_wk ) {
//print_r( $mp_wk );
//print_r( $mp_wk->nodeValue . PHP_EOL );
preg_match( '/(?:\()(\d*)(?:\))/', $mp_wk->nodeValue, $wk_nr_arr );
//print_r( $wk_nr_arr );
if ( ! empty( $wk_nr_arr[ 1 ] ) ) {
$wahl_arr[ 'unsp' ] = array(
'wknr' => $wk_nr_arr[ 1 ],
);
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'wahlkreisnummer' ] = $wk_nr_arr[ 1 ];
}
}
$linkliste_arr = array();
$mdb_pl_ll_lis = $mdb_pl_xp->query(
"//div[contains( @class, 'group-personal-links' )]//li" );
//print_r( $mdb_pl_ll_lis );
foreach ( $mdb_pl_ll_lis as $mp_ll_li ) {
//print_r( $mp_ll_li );
$mp_ll_li_val = trim( $mp_ll_li->nodeValue );
//print_r( $mp_ll_li_val . PHP_EOL );
$mp_ll_li_val = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $mp_ll_li_val );
$mp_ll_li_link = $mdb_pl_xp->query(
'.//a',
$mp_ll_li
);
foreach ( $mp_ll_li_link as $mlll ) {
$mp_ll_li_href = $mlll->getAttribute( 'href' );
//print_r( $mp_ll_li_href . PHP_EOL );
}
//$linkliste_arr[ $mp_ll_li_val ] = $mp_ll_li_href;
$link_arr = array(
'txt_val' => $mp_ll_li_val,
'href' => $mp_ll_li_href,
);
if ( preg_match( '/^homepage|website|persönliche homepage/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Homepage' ] = $link_arr;
} elseif ( preg_match( '/youtube|you tube/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Youtube' ] = $link_arr;
} elseif ( preg_match( '/facebook/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Facebook' ] = $link_arr;
} elseif ( preg_match( '/twitter/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Twitter' ] = $link_arr;
} elseif ( preg_match( '/google+/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Google+' ] = $link_arr;
} elseif ( preg_match( '/flickr/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'flickr' ] = $link_arr;
} elseif ( preg_match( '/xing/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'xing' ] = $link_arr;
} elseif ( preg_match( '/meinvz/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'meinvz' ] = $link_arr;
} elseif ( preg_match( '/studivz|VZ\-Netz/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'studivz' ] = $link_arr;
} elseif ( preg_match( '/myspace/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'MySpace' ] = $link_arr;
} elseif ( preg_match( '/^friendfeed/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'friendfeed' ] = $link_arr;
} elseif ( preg_match( '/^abgeordnetenwatch/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'PolSpez' ][ 'abgeordnetenwatch' ] = $link_arr;
} elseif ( preg_match( '/^Videoarchiv der Reden/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Bundestag' ][ 'Reden' ] = $link_arr;
} elseif ( preg_match( '/^Deutscher Bundestag/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Bundestag' ][ 'Profil' ] = $link_arr;
} elseif ( preg_match( '/Landesgruppe/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'bereichsspezifisch' ][ 'Landesgruppe' ] = $link_arr;
} elseif ( preg_match( '/^Landesverband/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'bereichsspezifisch' ][ 'Landesverband' ] = $link_arr;
} elseif ( preg_match( '/Kreisverband/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'bereichsspezifisch' ][ 'Kreisverband' ] = $link_arr;
} elseif ( preg_match( '/Landkreis|\bKreis\b/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'bereichsspezifisch' ][ 'Landkreis' ] = $link_arr;
} elseif ( preg_match( '/\b\w*kreis\b/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'bereichsspezifisch' ][ 'Landkreis' ] = $link_arr;
} elseif ( preg_match( '/\bStadt\b|\bBergstadt\b/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'bereichsspezifisch' ][ 'Stadt' ] = $link_arr;
} elseif ( preg_match( '/\bGemeinde\b/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'bereichsspezifisch' ][ 'Gemeinde' ] = $link_arr;
} elseif ( preg_match( '/^CDU/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'CDU-Link' ][ $mp_ll_li_val ] = $link_arr;
} else {
$linkliste_arr[ 'Sonstige' ][ 'Unsortiert' ][ $mp_ll_li_val ] = $link_arr;
}
$verw_arr = $profil_url_daten_obj->verweisbasis;
$verw_arr[ 'text' ] = $mp_ll_li_val;
$verw_arr[ 'href' ] = $mp_ll_li_href;
if ( preg_match( '/^homepage|website|persönliche homepage/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'persoenlicheseite' ] = $verw_arr;
} elseif ( preg_match( '/youtube|you tube/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Youtube' ] = $verw_arr;
} elseif ( preg_match( '/facebook/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Facebook' ] = $verw_arr;
} elseif ( preg_match( '/twitter/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Twitter' ] = $verw_arr;
} elseif ( preg_match( '/google+/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Google+' ] = $verw_arr;
} elseif ( preg_match( '/flickr/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'flickr' ] = $verw_arr;
} elseif ( preg_match( '/xing/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'xing' ] = $verw_arr;
} elseif ( preg_match( '/meinvz/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'meinvz' ] = $verw_arr;
} elseif ( preg_match( '/studivz|VZ\-Netz/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'studivz' ] = $verw_arr;
} elseif ( preg_match( '/myspace/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'MySpace' ] = $verw_arr;
} elseif ( preg_match( '/^friendfeed/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'friendfeed' ] = $verw_arr;
} elseif ( preg_match( '/^abgeordnetenwatch/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'spezifisch' ][ 'abgeordnetenwatch' ] = $verw_arr;
} elseif ( preg_match( '/^Videoarchiv der Reden/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'bundestag' ][ 'reden' ] = $verw_arr;
} elseif ( preg_match( '/^Deutscher Bundestag/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'bundestag' ][ 'profil' ] = $verw_arr;
} elseif ( preg_match( '/Landesgruppe/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'bereich' ][ 'Landesgruppe' ] = $verw_arr;
} elseif ( preg_match( '/^Landesverband/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'bereich' ][ 'Landesverband' ] = $verw_arr;
} elseif ( preg_match( '/Kreisverband/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'bereich' ][ 'Kreisverband' ] = $verw_arr;
} elseif ( preg_match( '/Landkreis|\bKreis\b/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'bereich' ][ 'Landkreis' ] = $verw_arr;
} elseif ( preg_match( '/\b\w*kreis\b/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'bereich' ][ 'Landkreis' ] = $verw_arr;
} elseif ( preg_match( '/\bStadt\b|\bBergstadt\b/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'bereich' ][ 'Stadt' ] = $verw_arr;
} elseif ( preg_match( '/\bGemeinde\b/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'bereich' ][ 'Gemeinde' ] = $verw_arr;
} elseif ( preg_match( '/^CDU/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'CDU-Link' ][ $mp_ll_li_val ] = $verw_arr;
} else {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'unsortiert' ][ $mp_ll_li_val ] = $verw_arr;
}
}
$biogra_arr = array();
$mdb_pl_bg_els = $mdb_pl_xp->query(
"//div[contains( @class, 'group-infobereich' )]"
);
//print_r( $mdb_pl_bg_els );
//print_r($mdb_nn . PHP_EOL);
foreach ( $mdb_pl_bg_els as $mp_bg_el ) {
//print_r( $mp_bg_el );
$mp_bg_el_gebd = $mdb_pl_xp->query(
'.//time[contains( @class, "date-display-single" )]',
$mp_bg_el
);
foreach ( $mp_bg_el_gebd as $mp_gebd ) {
//print_r($mp_gebd);
$mp_gebd_val = $mp_gebd->nodeValue;
//print_r( $mp_gebd_val . PHP_EOL );
$biogra_arr[ 'Geburtsdatum' ] = $mp_gebd_val;
$mdb_data_arr[ 'biografie' ][ 'geburtsdatum' ] = $mp_gebd_val;
}
$mp_bg_el_gebo = $mdb_pl_xp->query(
'.//div[contains( @class, "group-birthday" )]//child::text()[last()]',
$mp_bg_el
);
//print_r($mp_bg_el_gebo);
$mp_gebo_val = '';
if ( $mp_bg_el_gebo->length > 0 ) {
$gebo_ind = $mp_bg_el_gebo->length - 1;
$mp_gebo_val = $mp_bg_el_gebo->item( $gebo_ind )->nodeValue;
$mdb_data_arr[ 'biografie' ][ 'geburtsort' ] = $mp_bg_el_gebo->item( $gebo_ind )->nodeValue;
}
//print_r( $mp_gebo_val . PHP_EOL );
$biogra_arr[ 'Geburtsort' ] = $mp_gebo_val;
$mp_bg_el_ber = $mdb_pl_xp->query(
'.//div[contains( @class, "field-name-beruf-gendered" )]',
$mp_bg_el
);
foreach ( $mp_bg_el_ber as $mp_ber ) {
//print_r($mp_ber);
$mp_ber_val = $mp_ber->nodeValue;
//print_r( $mp_ber_val . PHP_EOL );
$biogra_arr[ 'Beruf' ] = $mp_ber_val;
$mdb_data_arr[ 'biografie' ][ 'beruf' ] = $mp_ber_val;
}
$mp_bg_el_fkt = $mdb_pl_xp->query(
'.//h3',
$mp_bg_el
);
$biogra_arr[ 'Funktion' ] = '';
foreach ( $mp_bg_el_fkt as $mp_fkt ) {
//print_r($mp_fkt);
$mp_fkt_val = $mp_fkt->nodeValue;
//print_r( $mp_fkt_val . PHP_EOL );
$biogra_arr[ 'Funktion' ] = $mp_fkt_val;
$mdb_data_arr[ 'politik' ][ 'fraktion' ][ 'funktionen' ] = $mp_fkt_val;
}
}
$mdb_data_arr[ 'url' ] = $mdb_pl_url;
$mdb_db[] = $mdb_data_arr;
/***/
$mdb_db_tmp[] = array(
'Name' => $name_arr,
'Kontaktdaten' => $mdb_cntc_adr_arr,
// 'Fraktion' => $ffkt_fl_arr,
'Wahl' => $wahl_arr,
'Links' => $linkliste_arr,
'Bio' => $biogra_arr,
'PURL' => $mdb_pl_url,
);
/***/
}
// print_r($mdb_db);die;
// print_r($mdb_db_tmp);die;
file_put_contents( $db_file_arr, var_export( $mdb_db, true ) );
file_put_contents( $db_file_arr.'.json', json_encode( $mdb_db, true ) );
parser-gruene.php
<?php
// $profil_url_daten_obj = new ProfilURLDaten();
// //print_r( $profil_url_daten_obj );
// $profil_url_daten_obj->pud_dump();
// die;
$mdb_db = array();
$mdb_db_tmp = array();
$db_file_arr = 'tmp/gruene_mdb_db';
$baseurl = $data[ 'current' ][ 'baseurl' ];
$mdb_lst_pg_dd = $data[ 'current' ][ 'domdoc' ];
$mdb_lst_dn = $mdb_lst_pg_dd->getElementById( 'abgeordnete_slides_container' );
$mdb_lst_dd = new DomDocument;
$mdb_lst_dd->appendChild(
$mdb_lst_dd->importNode(
$mdb_lst_dn,
true
)
);
$mdb_lst_xp = new DOMXpath( $mdb_lst_dd );
$mdb_lst_divs = $mdb_lst_xp->query( '//div[contains( @class, "tt_content_list_item" )]' );
foreach ( $mdb_lst_divs as $mdb_lst_div ) {
$profil_url_daten_obj = new ProfilURLDaten();
$mdb_data_arr = $profil_url_daten_obj->profil_url_daten;
// print_r($mdb_data_arr); die;
$name_from_list = $mdb_lst_xp->query(
'.//div[contains(@class, "abgeordnete_text")]/p/a',
$mdb_lst_div
);
//print_r( $name_from_list );
foreach ( $name_from_list as $nfl ) {
//print_r( $nfl->getAttribute( 'href' ) );
$name_arr = array();
$nfl_val = $nfl->nodeValue;
$mdb_pl_href = '/' . $nfl->getAttribute( 'href' );
// Clean Up Whitespaces
$nfl_str = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $nfl_val );
// Separate Title from Name
$nfl_ti_fn_nn_arr = explode( '.', $nfl_str );
$nfl_fn_nn_str = trim( array_pop( $nfl_ti_fn_nn_arr ) );
// Create Array with NN FN
$nfl_fn_nn_arr = explode( ' ', $nfl_fn_nn_str );
$mdb_nn = trim( $nfl_fn_nn_arr[1] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
$nfl_ti_arr = $nfl_ti_fn_nn_arr;
$mdb_ti = '';
if ( ! empty( $nfl_ti_arr ) ) {
foreach ( $nfl_ti_arr as $title_ele ) {
$mdb_ti .= $title_ele . '.';
}
}
$name_arr = array(
'Vorname' => $mdb_vn,
'Nachname' => $mdb_nn,
'Titel' => $mdb_ti,
);
$mdb_data_arr[ 'name' ][ 'nachname' ] = $mdb_nn;
$mdb_data_arr[ 'name' ][ 'vorname' ] = $mdb_vn;
$mdb_data_arr[ 'name' ][ 'adelszusatz' ] = '';
$mdb_data_arr[ 'name' ][ 'titel' ] = $mdb_ti;
}
$ffkt_from_list = $mdb_lst_xp->query(
'.//div[contains(@class, "abgeordnete_text")]/p[last()]',
$mdb_lst_div
);
//print_r( $ffkt_from_list );
foreach ( $ffkt_from_list as $ffl ) {
//print_r( $ffl );
$ffkt_fl_arr = array();
$ffkt_str = $ffl->nodeValue;
//print_r( $ffkt_str );
$ffkt_arr = explode( "\n", $ffkt_str );
//print_r( $ffkt_arr );
$ffkt_fl_arr[ 'Funktionen' ] = $ffkt_arr;
$mdb_data_arr[ 'politik' ][ 'fraktion' ][ 'funktionen' ] = $ffkt_arr;
}
$mail_from_list = $mdb_lst_xp->query(
'.//div[contains(@class, "email_link")]/a',
$mdb_lst_div
);
//print_r( $mail_from_list );
foreach ( $mail_from_list as $mfl ) {
$href_mailto = $mfl->getAttribute( 'href' );
$buero_mail = mailto_remover( $href_mailto );
}
$mdb_pl_url = $baseurl . $mdb_pl_href;
//print_r( $mdb_pl_url . PHP_EOL );
$src_file_name = basename( $mdb_pl_url, '.html' );
$src_path = 'tmp/gruene/src/abg/';
$src_file = $src_path . $src_file_name;
if ( ! file_exists( $src_file ) ) {
$src_url = file_get_contents( $mdb_pl_url );
file_put_contents(
$src_file,
$src_url
);
}
$mdb_pl_src = file_get_contents( $src_file );
//$mdb_pl_src = file_get_contents( $mdb_pl_url );
$mdb_pl_dd = initialize_domdoc( $mdb_pl_src );
$mdb_pl_xp = new DOMXpath( $mdb_pl_dd );
$mdb_cntc_adr_arr = array();
$kontakt_arr = $profil_url_daten_obj->kontakt;
//print_r($kontakt_arr);die;
$mdb_pl_ba = $mdb_pl_xp->query(
"//div[contains( @id, 'abgeordnete_links' )]
/p[contains( @class, 'bodytext' )]" );
//print_r( $mdb_pl_ba );
foreach ( $mdb_pl_ba as $mp_ba ) {
//print_r( $mp_ba );
$mp_ba_val_str = $mp_ba->nodeValue;
//print_r( $mp_ba_val . PHP_EOL );
$tel_fax_div = preg_match( '/(?:.*?)T:(?:.*?)/', $mp_ba_val_str );
//print_r( $tel_fax_div . PHP_EOL );
if ( ! $tel_fax_div ) {
continue;
} else {
$mp_ba_val_arr = explode( 'F', $mp_ba_val_str );
foreach ( $mp_ba_val_arr as $key => $value) {
$cvalue = trim(
preg_replace(
'/\s+/u',
' ',
preg_replace(
'/[a-zA-Z\,\.\:\/\-\Ü]/',//[^0-9,.]
'',
$value
)
)
);
$mp_ba_val_arr[ $key ] = $cvalue;
}
//print_r( $mp_ba_val_arr );
$bb_tel = '';
if ( isset( $mp_ba_val_arr[0] ) ) {
$bb_tel = $mp_ba_val_arr[0];
}
$bb_fax = '';
if ( isset( $mp_ba_val_arr[1] ) ) {
$bb_fax = $mp_ba_val_arr[1];
}
}
}
$bb_adr_data = array(
'Bezeichnung' => 'Berliner Büro',
'Adresse' => array(
'frag' => array(
'Straße' => 'Platz der Republik',
'Hausnummer' => '1',
'Postleitzahl' => '11011',
'Ort' => 'Berlin'
),
)
);
$bb_adr_data[ 'Adresse' ][ 'full' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Straße' ]
. ' '
. $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Hausnummer' ]
. PHP_EOL
. $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Postleitzahl' ]
. ' '
. $bb_adr_data[ 'Adresse' ][ 'frag' ][ 'Ort' ];
$bb_adr_data[ 'Kontakt' ] = array(
'Mail' => $buero_mail
);
$bb_adr_data[ 'Kontakt' ][ 'Telefon' ] = $bb_tel;
$bb_adr_data[ 'Kontakt' ][ 'Fax' ] = $bb_fax;
$mdb_cntc_adr_arr[] = $bb_adr_data;
$kontakt_arr[ 'bezeichnung' ] = 'Berliner Büro';
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ] = 'Platz der Republik';
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ] = '1';
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ] = '11011';
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ] = 'Berlin';
$kontakt_arr[ 'adresse' ][ 'komplett' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ]
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ];
$kontakt_arr[ 'eda' ][ 'mail' ] = $buero_mail;
$kontakt_arr[ 'eda' ][ 'telefon' ] = $bb_tel;
$kontakt_arr[ 'eda' ][ 'fax' ] = $bb_fax;
$mdb_data_arr[ 'kontakte' ][] = $kontakt_arr;
//$wk_adr_data = array();
$mdb_pl_wk_cntc = $mdb_pl_xp->query(
"//div[contains( @id, 'block_50_right' )]
/div[contains( @id, 'tabs' )]
/div[contains( @id, 'parlament' )]
/div[contains( @class, 'wk-kontakt' )]
//p[contains( @class, 'bodytext' )]"
);
//print_r( $mdb_pl_wk_cntc );
foreach ( $mdb_pl_wk_cntc as $key => $mp_wk_cntc ) {
//print_r( $mp_wk_cntc );
//print_r( $mp_wk_cntc->nodeValue );
//print_r( $key );
$mwcn_arr_buero = array();
$kontakt_arr = $profil_url_daten_obj->kontakt;
$mp_wk_cntc_nodes = $mdb_pl_xp->query(
'.//node()',
$mp_wk_cntc
);
//print_r( $mp_wk_cntc_nodes );
$mwcn_arr = array();
foreach ( $mp_wk_cntc_nodes as $mwcn ) {
//print_r( $mwcn );
if ( isset( $mwcn->nodeType ) && $mwcn->nodeType == 3 ) {
//print_r( $mwcn->nodeValue );
$mwcn_val = $mwcn->nodeValue;
$mwcn_arr[] = $mwcn_val;
}
}
//print_r( $mwcn_arr );
if ( $mdb_nn == 'Hofreiter' && $key == 2 ) {
array_unshift( $mwcn_arr, 'BÜRO' );
//print_r( $mwcn_arr );
//die;
} elseif ( $mdb_nn == 'Verlinden' && $key == 1 ) {
array_unshift( $mwcn_arr, 'BÜRO' );
} elseif ( $mdb_nn == 'Kekeritz' && $key == 1 ) {
continue;
} elseif ( $mdb_nn == 'Krischer' && $key == 1 ) {
continue;
} elseif ( $mdb_nn == 'Özdemir' && $key == 1 ) {
array_unshift( $mwcn_arr, 'BÜRO' );
//print_r($mdb_pl_wk_cntc->item(2));
$mp_wk_cntc_nodes_tmp = $mdb_pl_xp->query(
'.//node()',
$mdb_pl_wk_cntc->item(2)
);
$mwcn_arr_tmp = array();
foreach ( $mp_wk_cntc_nodes_tmp as $mwcn_tmp ) {
//print_r( $mwcn );
if ( isset( $mwcn_tmp->nodeType ) && $mwcn_tmp->nodeType == 3 ) {
//print_r( $mwcn->nodeValue );
$mwcn_val_tmp = $mwcn_tmp->nodeValue;
$mwcn_arr_tmp[] = $mwcn_val_tmp;
}
}
//print_r($mwcn_arr_tmp);
foreach ( $mwcn_arr_tmp as $matmp) {
array_push( $mwcn_arr, $matmp );
}
//print_r($mwcn_arr);
//die;
} elseif ( $mdb_nn == 'Rüffer' ) {
if ( $key == 2 ) {
$mp_wk_cntc_nodes_tmp = $mdb_pl_xp->query(
'.//node()',
$mdb_pl_wk_cntc->item(1)
);
$mwcn_arr_tmp = array();
foreach ( $mp_wk_cntc_nodes_tmp as $mwcn_tmp ) {
//print_r( $mwcn );
if ( isset( $mwcn_tmp->nodeType ) && $mwcn_tmp->nodeType == 3 ) {
//print_r( $mwcn->nodeValue );
$mwcn_val_tmp = $mwcn_tmp->nodeValue;
$mwcn_arr_tmp[] = $mwcn_val_tmp;
}
}
//print_r($mwcn_arr_tmp);
$mwcn_arr_tmp = array_reverse( $mwcn_arr_tmp );
foreach ( $mwcn_arr_tmp as $matmp) {
array_unshift( $mwcn_arr, $matmp );
}
//print_r($mwcn_arr_tmp);
array_unshift( $mwcn_arr, 'BÜRO' );
$mail_tmp = $mwcn_arr[ 6 ] . $mwcn_arr[ 7 ];
unset($mwcn_arr[ 5 ]);
unset($mwcn_arr[ 6 ]);
unset($mwcn_arr[ 7 ]);
unset($mwcn_arr[ 8 ]);
array_push( $mwcn_arr, $mail_tmp );
//print_r($mwcn_arr);
//die;
}
if ( $key == 3 ) continue;
} elseif ( $mdb_nn == 'Strengmann-Kuhn' && $key == 1 ) {
array_unshift( $mwcn_arr, 'BÜRO' );
}
if (
preg_match( '/\b(\w*bür\w*)\b/iu', $mwcn_arr[ 0 ] )
|| count( $mwcn_arr ) > 2
) {
//print_r( PHP_EOL . 'BÜRO' . PHP_EOL );
unset( $mwcn_arr[ 0 ] );
//print_r( $mwcn_arr );
//print_r( PHP_EOL );
foreach ( $mwcn_arr as $mwcn_key => $mwcn_val ) {
//print_r( strlen( $mwcn_val ) . PHP_EOL );
$mwcn_val_len = strlen( $mwcn_val );
if ( $mwcn_val_len < 7 ) unset( $mwcn_arr[ $mwcn_key ] );
}
//print_r( PHP_EOL );
$mwcn_ac = count( $mwcn_arr );
//print_r( $mwcn_ac . PHP_EOL );
if ( $mwcn_ac > 2 ) {
//print_r( $mwcn_arr );
$mwcn_arr_buero_tmp = array();
foreach ( $mwcn_arr as $mwcn_key => $mwcn_val ) {
$mwcn_val_cl = preg_replace( '/^\s+|\n|\r|\s+$/u', '', $mwcn_val );
if (
preg_match(
'/^\b(mitarbeit\w*)/iu',
$mwcn_val_cl
)
) {
break;
}
if (
preg_match(
'/^\b(mo\-|für\w*)|(.*?)(\(zugang\w*)|^\b(hannover|dannenberg|lüneburg)\b|^(haus der|bad hers\w*)/iu',
$mwcn_val_cl
)
) {
continue;
}
$mwcn_arr_buero_tmp[] = $mwcn_val_cl;
}
//print_r( $mwcn_arr_buero_tmp );
} else {
//print_r( $mwcn_arr );
//print_r( ' >2 ' . $mdb_nn);
}
$mabt_mail_arr = array();
$mabt_tel_arr = array();
$mabt_fax_arr = array();
$mabt_str = '';
$mabt_hnr = '';
$mabt_plz = '';
$mabt_ort = '';
$mabt_mail = '';
$mabt_tel = '';
$mabt_fax = '';
foreach ( $mwcn_arr_buero_tmp as $mabt_key => $mabt_val ) {
if ( $mabt_key == 0 ) {
$str_hnr_arr = preg_split( '/(?=\d)/', $mabt_val, 2 );
array_trim( $str_hnr_arr );
//print_r( $str_hnr_arr );
$mabt_str = $str_hnr_arr[ 0 ];
$mabt_hnr = $str_hnr_arr[ 1 ];
}
if ( $mabt_key == 1 ) {
//$mabt_val = preg_replace( '/^(\s*)(?:\d))/', '', $mabt_val );
if ( $mdb_nn == 'Wilms' ) {
$mabt_val = substr( $mabt_val, 2 );
}
$plz_ort_arr = preg_split( '/(?!\d)/', $mabt_val, 2 );
array_trim( $plz_ort_arr );
//print_r( $plz_ort_arr );
$mabt_plz = $plz_ort_arr[ 0 ];
$mabt_ort = $plz_ort_arr[ 1 ];
}
if ( $mabt_key > 1 ) {
if ( preg_match( '/^(T.|T:|T+|Tel)/', $mabt_val ) ) {
$mabt_tel_arr[] = trim(
preg_replace(
'/(\s+)/',
'',
preg_replace(
'/([^0-9|\s*])/',
'',
preg_replace(
'/^(T.|T:|T+|Tel)/',
'',
$mabt_val
)
)
)
);
//print_r( $mabt_tel_arr );
$mabt_tel = $mabt_tel_arr[ 0 ];
}
if ( preg_match( '/^(F.|F:|F+|Fax)/', $mabt_val ) ) {
$mabt_fax_arr[] = trim(
preg_replace(
'/(\s+)/',
'',
preg_replace(
'/([^0-9|\s*])/',
'',
preg_replace(
'/^(F.|F:|F+|Fax)/',
'',
$mabt_val
)
)
)
);
//print_r( $mabt_fax_arr );
$mabt_fax = $mabt_fax_arr[ 0 ];
}
if ( preg_match( '/(\@)/', $mabt_val ) ) {
$mabt_mail_arr[] = strtolower( $mabt_val );
//print_r( $mabt_mail_arr );
$mabt_mail = $mabt_mail_arr[ 0 ];
}
if ( preg_match( '/(\(at\))/', $mabt_val ) ) {
$mabt_mail_arr[] = strtolower(
preg_replace(
'/(\s*)/',
'',
preg_replace(
'/(\(at\))/',
'@',
$mabt_val
)
)
);
//print_r( $mabt_mail_arr );
$mabt_mail = $mabt_mail_arr[ 0 ];
}
}
}
$mwcn_arr_buero[ 'Bezeichnung' ] = 'Wahlkreisbüro';
$mwcn_arr_buero[ 'Adresse' ][ 'full' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $mabt_str
. ' '
. $mabt_hnr
. PHP_EOL
. $mabt_plz
. ' '
. $mabt_ort;
$mwcn_arr_buero[ 'Adresse' ][ 'frag' ] = array(
'Straße' => $mabt_str,
'Hausnummer' => $mabt_hnr,
'Postleitzahl' => $mabt_plz,
'Ort' => $mabt_ort,
);
$mwcn_arr_buero[ 'Kontakt' ] = array(
'Mail' => $mabt_mail,
'Telefon' => $mabt_tel,
'Fax' => $mabt_fax,
);
//print_r($mwcn_arr_buero);
$kontakt_arr[ 'bezeichnung' ] = 'Wahlkreisbüro';
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ] = $mabt_str;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ] = $mabt_hnr;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ] = $mabt_plz;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ] = $mabt_ort;
$kontakt_arr[ 'adresse' ][ 'komplett' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $mabt_str
. ' '
. $mabt_hnr
. PHP_EOL
. $mabt_plz
. ' '
. $mabt_ort;
$kontakt_arr[ 'eda' ][ 'mail' ] = $mabt_mail;
$kontakt_arr[ 'eda' ][ 'telefon' ] = $mabt_tel;
$kontakt_arr[ 'eda' ][ 'fax' ] = $mabt_fax;
$mdb_data_arr[ 'kontakte' ][] = $kontakt_arr;
if ( ! empty( $mwcn_arr_buero ) ) {
$mdb_cntc_adr_arr[] = $mwcn_arr_buero;
}
} else {
//print_r( $mwcn_arr );
//print_r( ' bür ' . $mdb_nn );
}
}
$mdb_pl_wk_info = $mdb_pl_xp->query(
"//div[contains( @id, 'block_50_right' )]
/div[contains( @id, 'tabs' )]
/div[contains( @id, 'parlament' )]
/div[contains( @class, 'wk-info' )]
//a[contains( text(), 'Wahlkreis' )]"
);
//print_r( $mdb_pl_wk_info );
$wahl_arr = array();
$mdb_pl_wk_info_exists = $mdb_pl_wk_info->length;
if ( $mdb_pl_wk_info_exists ) {
foreach ( $mdb_pl_wk_info as $key => $mp_wk_info ) {
//print_r( $mdb_nn . ': ' . $mp_wk_info->nodeValue . PHP_EOL );
$mp_wk_info_href = $mp_wk_info->getAttribute( 'href' );
//print_r( $mp_wk_info_href . PHP_EOL );
//print_r( parse_url( $mp_wk_info_href ) );
$mp_wk_info_wknr_para = parse_url( $mp_wk_info_href, PHP_URL_QUERY );
//print_r( $mp_wk_info_wknr_para . PHP_EOL );
$mp_wk_info_wknr_arr = explode( '=', $mp_wk_info_wknr_para );
//print_r( $mp_wk_info_wknr_arr );
if ( ! empty( $mp_wk_info_wknr_arr[ 0 ] ) ) {
$wahl_arr[ 'unsp' ] = array(
$mp_wk_info_wknr_arr[ 0 ] => $mp_wk_info_wknr_arr[ 1 ],
);
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'wahlkreisnummer' ] = $mp_wk_info_wknr_arr[ 1 ];
}
}
}
$linkliste_arr = array();
$mdb_pl_ll_lis = $mdb_pl_xp->query(
"//div[contains( @id, 'block_25_right' )]/div[contains( @id, 'links' )]/ul/li" );
//print_r( $mdb_pl_ll_lis );
foreach ( $mdb_pl_ll_lis as $mp_ll_li ) {
//print_r( $mp_ll_li );
$mp_ll_li_val = trim( $mp_ll_li->nodeValue );
$mp_ll_li_link = $mdb_pl_xp->query(
'.//a',
$mp_ll_li
);
foreach ( $mp_ll_li_link as $mlll ) {
$mp_ll_li_href = $mlll->getAttribute( 'href' );
}
$link_arr = array(
'txt_val' => $mp_ll_li_val,
'href' => $mp_ll_li_href,
);
if ( preg_match( '/^homepage|website|blog/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Homepage' ] = $link_arr;
} elseif ( preg_match( '/^youtube/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Youtube' ] = $link_arr;
} elseif ( preg_match( '/facebook/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Facebook' ] = $link_arr;
} elseif ( preg_match( '/twitter/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Twitter' ] = $link_arr;
} elseif ( preg_match( '/google+/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Google+' ] = $link_arr;
} elseif ( preg_match( '/flickr/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'flickr' ] = $link_arr;
} elseif ( preg_match( '/xing/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'xing' ] = $link_arr;
} elseif ( preg_match( '/meinvz/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'meinvz' ] = $link_arr;
} elseif ( preg_match( '/studivz/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'studivz' ] = $link_arr;
} elseif ( preg_match( '/friendfeed/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'friendfeed' ] = $link_arr;
} elseif ( preg_match( '/^Reden im Videoarchiv des Bundestags/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Bundestag' ][ 'Reden' ] = $link_arr;
} elseif ( preg_match( '/Porträt auf|Porträt \(|Porträt bei/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Bundestag' ][ 'Profil' ] = $link_arr;
} elseif ( preg_match( '/abgeordnetenwatch/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'spezifisch' ][ 'abgeordnetenwatch' ] = $link_arr;
} elseif ( preg_match( '/Veröffentlichungspflichtige|Veröffentlichungspflichte/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'spezifisch' ][ 'VOA' ] = $link_arr;
} elseif ( preg_match( '/leichter sprache/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'spezifisch' ][ 'leichtesprache' ] = $link_arr;
} elseif ( preg_match( '/gebärdensprache/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'spezifisch' ][ 'gebärdensprache' ] = $link_arr;
} elseif ( preg_match( '/^Landesgruppe/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'bereichsspezifisch' ][ 'Landesgruppe' ] = $link_arr;
} elseif ( preg_match( '/\.net|\.eu|\.com|\.de$/iu', $mp_ll_li_val ) ) {
$linkliste_arr[ 'Homepage' ] = $link_arr;
} else {
$linkliste_arr[ 'Sonstige' ][ 'Unsortiert' ][ $mp_ll_li_val ] = $link_arr;
}
$verw_arr = $profil_url_daten_obj->verweisbasis;
$verw_arr[ 'text' ] = $mp_ll_li_val;
$verw_arr[ 'href' ] = $mp_ll_li_href;
if ( preg_match( '/^homepage|website|blog/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'persoenlicheseite' ] = $link_arr;
} elseif ( preg_match( '/^youtube/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Youtube' ] = $verw_arr;
} elseif ( preg_match( '/facebook/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Facebook' ] = $verw_arr;
} elseif ( preg_match( '/twitter/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Twitter' ] = $verw_arr;
} elseif ( preg_match( '/google+/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Google+' ] = $verw_arr;
} elseif ( preg_match( '/flickr/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'flickr' ] = $verw_arr;
} elseif ( preg_match( '/xing/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'xing' ] = $verw_arr;
} elseif ( preg_match( '/meinvz/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'meinvz' ] = $verw_arr;
} elseif ( preg_match( '/studivz/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'studivz' ] = $verw_arr;
} elseif ( preg_match( '/friendfeed/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'friendfeed' ] = $verw_arr;
} elseif ( preg_match( '/^Reden im Videoarchiv des Bundestags/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'bundestag' ][ 'reden' ] = $verw_arr;
} elseif ( preg_match( '/Porträt auf|Porträt \(|Porträt bei/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'bundestag' ][ 'profil' ] = $verw_arr;
} elseif ( preg_match( '/abgeordnetenwatch/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'spezifisch' ][ 'abgeordnetenwatch' ] = $verw_arr;
} elseif ( preg_match( '/Veröffentlichungspflichtige|Veröffentlichungspflichte/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'spezifisch' ][ 'VOA' ] = $verw_arr;
} elseif ( preg_match( '/leichter sprache/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'spezifisch' ][ 'leichtesprache' ] = $verw_arr;
} elseif ( preg_match( '/gebärdensprache/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'spezifisch' ][ 'gebärdensprache' ] = $verw_arr;
} elseif ( preg_match( '/^Landesgruppe/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'bereich' ][ 'Landesgruppe' ] = $verw_arr;
} elseif ( preg_match( '/\.net|\.eu|\.com|\.de$/iu', $mp_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'persoenlicheseite' ] = $verw_arr;
} else {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'unsortiert' ][ $mp_ll_li_val ] = $verw_arr;
}
}
/***/
$mdb_pl_ad_xp_bg = $mdb_pl_xp->query(
'//div[contains( @id, "vita" )]
/p[contains( @class, "bodytext" )][1]'
);
$biogra_arr = array();
foreach ( $mdb_pl_ad_xp_bg as $mpax_bg ) {
//print_r( $mpax_bg );
//print_r( $mdb_nn . PHP_EOL );
$biogra_arr[ 'Geburtsdatum' ] = '';
//print_r( $mpax_bg->nodeValue . PHP_EOL );
//print_r( preg_match( '/(\d{2}(.*?)(\d{4}))/', $mpax_bg->nodeValue, $geb_dat_arr ) );
preg_match( '/(\d{1,2}(.*?)(\d{4}))/', $mpax_bg->nodeValue, $geb_dat_arr );
//print_r( $geb_dat_arr[ 0 ] . PHP_EOL );
$biogra_arr[ 'Geburtsdatum' ] = $geb_dat_arr[ 0 ];
$mdb_data_arr[ 'biografie' ][ 'geburtsdatum' ] = $geb_dat_arr[ 0 ];
preg_match( '/(?:\sin\s)(\b\w*\b)/u', $mpax_bg->nodeValue, $geb_ort_arr );
$biogra_arr[ 'Geburtsort' ] = '';
if ( isset( $geb_ort_arr[ 1 ] ) ) {
//print_r( $geb_ort_arr[ 1 ] . PHP_EOL );
$biogra_arr[ 'Geburtsort' ] = $geb_ort_arr[ 1 ];
$mdb_data_arr[ 'biografie' ][ 'geburtsort' ] = $geb_ort_arr[ 1 ];
}
//print_r( $biogra_arr );
}
/***/
$mdb_data_arr[ 'url' ] = $mdb_pl_url;
$mdb_db[] = $mdb_data_arr;
$mdb_db_tmp[] = array(
'Name' => $name_arr,
'Kontaktdaten' => $mdb_cntc_adr_arr,
'Fraktion' => $ffkt_fl_arr,
'Wahl' => $wahl_arr,
'Links' => $linkliste_arr,
'Bio' => $biogra_arr,
'PURL' => $mdb_pl_url,
);
}
// print_r($mdb_db);die;
// print_r($mdb_db_tmp);die;
file_put_contents( $db_file_arr, var_export( $mdb_db, true ) );
file_put_contents( $db_file_arr.'.json', json_encode( $mdb_db, true ) );
parser-linke.php
<?php
// $profil_url_daten_obj = new ProfilURLDaten();
// //print_r( $profil_url_daten_obj );
// $profil_url_daten_obj->pud_dump();
// die;
$mdb_db = array();
$mdb_db_tmp = array();
$mdb_tmp = array();
$db_file_arr = 'tmp/linke_mdb_db';
$baseurl = $data[ 'current' ][ 'baseurl' ];
$mdb_lst_pg_dd = $data[ 'current' ][ 'domdoc' ];
$mdb_lst_xp = new DOMXpath( $mdb_lst_pg_dd );
$mdb_lst_divs = $mdb_lst_xp->query(
'//div[contains( @class, "listenElement" )]'
);
//print_r($mdb_lst_divs);
foreach ( $mdb_lst_divs as $mdb_lst_div ) {
//print_r($mdb_lst_div);
$profil_url_daten_obj = new ProfilURLDaten();
$mdb_data_arr = $profil_url_daten_obj->profil_url_daten;
$name_from_list = $mdb_lst_xp->query(
'.//a[1]',
$mdb_lst_div
);
// print_r($name_from_list);
foreach ( $name_from_list as $nfl ) {
//print_r( $nfl->getAttribute( 'title' ) . PHP_EOL );
//print_r( $nfl->getAttribute( 'href' ) . PHP_EOL );
$name_arr = array();
$nfl_val = $nfl->getAttribute( 'title' );
$mdb_pl_href = $nfl->getAttribute( 'href' );
//print_r( $mdb_pl_href . PHP_EOL );
// Clean Up Whitespaces
$nfl_str = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $nfl_val );
//print_r($nfl_str . PHP_EOL );
$nfl_str = preg_replace( '/([A-Z])(\.)/', '$1', $nfl_str );
//print_r($nfl_str . PHP_EOL );
// Separate Title from Name
$nfl_ti_fn_nn_arr = explode( '.', $nfl_str );
$nfl_fn_nn_str = trim( array_pop( $nfl_ti_fn_nn_arr ) );
// Create Array with NN FN
$nfl_fn_nn_arr = explode( ' ', $nfl_fn_nn_str );
//print_r($nfl_fn_nn_arr);
if ( count($nfl_fn_nn_arr) > 2 ) {
//print_r($nfl_fn_nn_arr);
if ( preg_match( '/\b(\sde|Graf|van\s)\b/', $nfl_str ) ) {
//print_r(preg_match( '/\b(\sde|Graf\s)\b/', $nfl_str ));
$mdb_nn = trim( $nfl_fn_nn_arr[1] . ' ' . $nfl_fn_nn_arr[2] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
} elseif ( preg_match( '/\b(\svon\s)\b/', $nfl_str ) ) {
//print_r(preg_match( '/\b(\svon\s)\b/', $nfl_str ));
if ( preg_match( '/\b(\svon der\s|\sFreiherr von\s)\b/', $nfl_str ) ) {
$mdb_nn = trim( $nfl_fn_nn_arr[1] . ' ' . $nfl_fn_nn_arr[2] . ' ' . $nfl_fn_nn_arr[3] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
} elseif ( preg_match( '/\b(\svon\s)\b/', $nfl_str ) ) {
$mdb_nn = trim( $nfl_fn_nn_arr[1] . ' ' . $nfl_fn_nn_arr[2] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
}
} else {
$mdb_nn = trim( $nfl_fn_nn_arr[2] );
if ( strlen( $nfl_fn_nn_arr[1] ) == 1 ) {
$mdb_vn = trim( $nfl_fn_nn_arr[0] . ' ' . $nfl_fn_nn_arr[1] . '.' );
} else {
$mdb_vn = trim( $nfl_fn_nn_arr[0] . ' ' . $nfl_fn_nn_arr[1] );
}
}
} else {
$mdb_nn = trim( $nfl_fn_nn_arr[1] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
}
//print_r( $mdb_vn . ' ' . $mdb_nn . PHP_EOL );
$nfl_ti_arr = $nfl_ti_fn_nn_arr;
$mdb_ti = '';
if ( ! empty( $nfl_ti_arr ) ) {
foreach ( $nfl_ti_arr as $title_ele ) {
$mdb_ti .= $title_ele . '.';
}
}
$name_arr = array(
'Vorname' => $mdb_vn,
'Nachname' => $mdb_nn,
'Titel' => $mdb_ti,
);
//print_r($name_arr);
$mdb_data_arr[ 'name' ][ 'nachname' ] = $mdb_nn;
$mdb_data_arr[ 'name' ][ 'vorname' ] = $mdb_vn;
$mdb_data_arr[ 'name' ][ 'adelszusatz' ] = '';
$mdb_data_arr[ 'name' ][ 'titel' ] = $mdb_ti;
}
$mdb_pl_url = $baseurl . $mdb_pl_href;
//print_r( $mdb_pl_url . PHP_EOL );
$src_file_name = basename( pathinfo( $mdb_pl_url )['dirname'] );
$src_path = 'tmp/linke/src/abg/';
$src_file = $src_path . $src_file_name;
if ( ! file_exists( $src_file ) ) {
$src_url = file_get_contents( $mdb_pl_url );
file_put_contents(
$src_file,
$src_url
);
}
$mdb_pl_src = file_get_contents( $src_file );
//$mdb_pl_src = file_get_contents( $mdb_pl_url );
$mdb_pl_dd = initialize_domdoc( $mdb_pl_src );
$mdb_pl_xp = new DOMXpath( $mdb_pl_dd );
$linkliste_arr = array();
$mdb_pl_lk_el = $mdb_pl_xp->query(
"//div[contains( @class, 'elemTeaser' )]
//a[contains( @class, 'linkMehr' )]"
);
//print_r( $mdb_pl_lk_el );
foreach ( $mdb_pl_lk_el as $mp_lk_el ) {
//print_r( $mp_lk_el );
//$mp_lk_val_str = $mp_lk_el->nodeValue;
//print_r( $mp_lk_val_str . PHP_EOL );
$mp_lk_val_href = $mp_lk_el->getAttribute( 'href' );
//print_r( $mp_lk_val_href . PHP_EOL );
$linkliste_arr[ 'Homepage' ] = $mp_lk_val_href;
$verw_arr = $profil_url_daten_obj->verweisbasis;
$verw_arr[ 'text' ] = 'Persönliche Seite';
$verw_arr[ 'href' ] = $mp_lk_val_href;
$mdb_data_arr[ 'verweise' ][ 'persoenlicheseite' ] = $verw_arr;
}
$biogra_arr = array();
$mdb_pl_bg_els = $mdb_pl_xp->query(
"//div[contains( @class, 'mdbProfil' )]
//div[contains( @class, 'mdbKopf' )]
//div[contains( @class, 'mdbDetails' )]"
);
//print_r( $mdb_pl_bg_els );
foreach ( $mdb_pl_bg_els as $mp_bg_el ) {
//print_r( $mp_bg_el );
//$mp_bg_val_str = $mp_bg_el->nodeValue;
//print_r( $mp_bg_val_str . PHP_EOL );
$mp_bg_el_gebd = $mdb_pl_xp->query(
'.//span[contains( @class, "mdbGeburtstag" )]',
$mp_bg_el
);
foreach ( $mp_bg_el_gebd as $mp_gebd ) {
//print_r($mp_gebd);
$mp_gebd_val = $mp_gebd->nodeValue;
$biogra_arr[ 'Geburtsdatum' ] = '';
//print_r( $mp_gebd_val . PHP_EOL );
//print_r( preg_match( '/(\d{2}(.*?)(\d{4}))/', $mp_gebd_val, $geb_dat_arr ) );
preg_match( '/(\d{1,2}(.*?)(\d{4}))/', $mp_gebd_val, $geb_dat_arr );
//print_r( $geb_dat_arr[ 0 ] . PHP_EOL );
$biogra_arr[ 'Geburtsdatum' ] = $geb_dat_arr[ 0 ];
$mdb_data_arr[ 'biografie' ][ 'geburtsdatum' ] = $geb_dat_arr[ 0 ];
}
$mp_bg_el_ber = $mdb_pl_xp->query(
'.//span[contains( @class, "mdbBeruf" )]',
$mp_bg_el
);
foreach ( $mp_bg_el_ber as $mp_ber ) {
//print_r($mp_ber);
$mp_ber_val = $mp_ber->nodeValue;
$mp_ber_val = trim(
preg_replace( '/^Beruf:\s/', '', $mp_ber_val )
);
//print_r( $mp_ber_val . PHP_EOL );
$mp_ber_arr = explode( ',', $mp_ber_val );
$mp_ber_arr = array_filter( $mp_ber_arr );
array_trim( $mp_ber_arr );
//print_r( $mp_ber_arr );
$biogra_arr[ 'Beruf' ] = $mp_ber_arr;
$mdb_data_arr[ 'biografie' ][ 'beruf' ] = $mp_ber_arr;
}
$mp_bg_el_mdt = $mdb_pl_xp->query(
'.//div[contains( @class, "mdbMandat" )]',
$mp_bg_el
);
foreach ( $mp_bg_el_mdt as $mp_mdt ) {
//print_r($mp_mdt);
$mp_mdt_val = $mp_mdt->nodeValue;
//print_r( $mp_mdt_val . PHP_EOL );
if ( preg_match( '/^Landesliste/', $mp_mdt_val, $ll_match ) ) {
//print_r( $ll_match );
$mp_mdt_arr = explode( ' ', $mp_mdt_val );
//print_r( $mp_mdt_arr );
$biogra_arr[ 'Mandat' ] = array(
'Typ' => $mp_mdt_arr[ 0 ],
'Bundesland' => $mp_mdt_arr[ 1 ],
);
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'typ' ] = $mp_mdt_arr[ 0 ];
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'bundesland' ] = $mp_mdt_arr[ 1 ];
} elseif ( preg_match( '/^Direktmandat/', $mp_mdt_val, $dm_match ) ) {
//print_r( $dm_match );
$mp_mdt_arr = explode( ' ', $mp_mdt_val );
//print_r( $mp_mdt_arr );
$biogra_arr[ 'Mandat' ] = array(
'Typ' => $mp_mdt_arr[ 0 ],
'wknr' => $mp_mdt_arr[ 3 ],
);
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'typ' ] = $mp_mdt_arr[ 0 ];
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'wahlkreisnummer' ] = $mp_mdt_arr[ 3 ];
}
//$biogra_arr[ 'Mandat' ] = $mp_mdt_arr;
}
$mp_bg_el_fkt = $mdb_pl_xp->query(
'.//div[contains( @class, "mdbFunktion" )]',
$mp_bg_el
);
foreach ( $mp_bg_el_fkt as $mp_fkt ) {
//print_r($mp_fkt);
$mp_fkt_val = $mp_fkt->nodeValue;
//print_r( $mp_fkt_val . PHP_EOL );
$biogra_arr[ 'Funktion' ] = $mp_fkt_val;
$mdb_data_arr[ 'politik' ][ 'fraktion' ][ 'funktionen' ] = $mp_fkt_val;
}
$mp_bg_el_mgl = $mdb_pl_xp->query(
'.//span[last()]',
$mp_bg_el
);
foreach ( $mp_bg_el_mgl as $mp_mgl ) {
//print_r($mp_mgl);
$mp_mgl_val = $mp_mgl->nodeValue;
//print_r( $mp_mgl_val . PHP_EOL );
preg_match_all( '/(\d{1,2})/', $mp_mgl_val, $geb_mgl_arr );
//print_r( $geb_mgl_arr );
$biogra_arr[ 'MitgliedWahlperioden' ] = $geb_mgl_arr[ 1 ];
$mdb_data_arr[ 'politik' ][ 'bundestag' ][ 'legislaturperioden' ] = $geb_mgl_arr[ 1 ];
}
}
//print_r( parse_url( $mdb_pl_url ) );
//print_r( pathinfo( parse_url( $mdb_pl_url, PHP_URL_PATH ) ) );
$mdb_pi_tmp = pathinfo( parse_url( $mdb_pl_url, PHP_URL_PATH ) );
$mdb_pl_path = $mdb_pi_tmp[ 'dirname' ];
$mdb_cntc_bn = '/kontakt';
$mdb_cntc_url = $baseurl . $mdb_pl_path . $mdb_cntc_bn;
//print_r( $mdb_cntc_url );
$mdb_cntc_src = file_get_contents( $mdb_cntc_url );
$mdb_cntc_dd = initialize_domdoc( $mdb_cntc_src );
$mdb_cntc_xp = new DOMXpath( $mdb_cntc_dd );
$mdb_cntc_adr_arr = array();
$mdb_cntc_els = $mdb_cntc_xp->query(
"//div[contains( @class, 'mdbProfil' )]
//div[contains( @class, 'kontakt' )]"
);
//print_r( $mdb_cntc_els );
foreach ( $mdb_cntc_els as $mdb_cntc_el ) {
//print_r( $mdb_cntc_el );
$mdb_cntc_cur_buero = array();
$kontakt_arr = $profil_url_daten_obj->kontakt;
$bd_str = '';
$bd_hnr = '';
$bd_plz = '';
$bd_ort = '';
$bd_mail = '';
$bd_tel = '';
$bd_fax = '';
$mp_wk_el_bn = $mdb_cntc_xp->query(
'.//h5',
$mdb_cntc_el
);
foreach ( $mp_wk_el_bn as $mp_bn ) {
//print_r($mp_bn);
$mp_bn_val = $mp_bn->nodeValue;
$mp_bn_val = preg_replace( '/,/', '', $mp_bn_val );
//print_r( preg_match( '/,/', $mp_bn_val ) );
//print_r( $mp_bn_val . PHP_EOL );
if ( preg_match( '/^Deutscher Bundestag/', $mp_bn_val ) ) {
$mdb_cntc_cur_buero[ 'Bezeichnung' ] = $mp_bn_val;
$kontakt_arr[ 'bezeichnung' ] = $mp_bn_val;
} elseif ( preg_match( '/^Wahlkreisbüro|Landesgruppe/', $mp_bn_val ) ) {
$mdb_cntc_cur_buero[ 'Bezeichnung' ] = 'Wahlkreisbüro';
$kontakt_arr[ 'bezeichnung' ] = 'Wahlkreisbüro';
} elseif ( preg_match( '/^Bürgerbüro|BürgerInnenbüro/', $mp_bn_val ) ) {
$mdb_cntc_cur_buero[ 'Bezeichnung' ] = 'Bürgerbüro';
$kontakt_arr[ 'bezeichnung' ] = 'Bürgerbüro';
} else {
print_r($mdb_nn.PHP_EOL);die;
}
}
$mp_wk_el_sa = $mdb_cntc_xp->query(
'.//span[contains( @itemprop, "street-address" )]',
$mdb_cntc_el
);
foreach ( $mp_wk_el_sa as $mp_sa ) {
//print_r($mp_sa);
$mp_sa_val = $mp_sa->nodeValue;
//print_r( $mp_sa_val . PHP_EOL );
$str_hnr_arr = preg_split( '/(?=\d)/', $mp_sa_val, 2 );
//print_r( $str_hnr_arr );
$bd_str = trim( $str_hnr_arr[ 0 ] );
$bd_hnr = $str_hnr_arr[ 1 ];
}
$mp_wk_el_pc = $mdb_cntc_xp->query(
'.//span[contains( @itemprop, "postal-code" )]',
$mdb_cntc_el
);
foreach ( $mp_wk_el_pc as $mp_pc ) {
//print_r($mp_pc);
$mp_pc_val = $mp_pc->nodeValue;
//print_r( $mp_pc_val . PHP_EOL );
$bd_plz = $mp_pc_val;
}
$mp_wk_el_ly = $mdb_cntc_xp->query(
'.//span[contains( @itemprop, "locality" )]',
$mdb_cntc_el
);
foreach ( $mp_wk_el_ly as $mp_ly ) {
//print_r($mp_ly);
$mp_ly_val = $mp_ly->nodeValue;
//print_r( $mp_ly_val . PHP_EOL );
$bd_ort = $mp_ly_val;
}
$mp_wk_el_tl = $mdb_cntc_xp->query(
'.//span[contains( @itemprop, "tel" )]',
$mdb_cntc_el
);
foreach ( $mp_wk_el_tl as $mp_tl ) {
//print_r($mp_tl);
$mp_tl_val = trim( $mp_tl->nodeValue );
$mp_tl_val = preg_replace( '/\(|\)|\-|\//', '', $mp_tl_val );
$mp_tl_val = preg_replace( '/^\+49/', '0', $mp_tl_val );
//print_r( $mp_tl_val . PHP_EOL );
$bd_tel = $mp_tl_val;
}
$mp_wk_el_fx = $mdb_cntc_xp->query(
'.//span[contains( @itemprop, "fax" )]',
$mdb_cntc_el
);
foreach ( $mp_wk_el_fx as $mp_fx ) {
//print_r($mp_fx);
$mp_fx_val = trim( $mp_fx->nodeValue );
$mp_fx_val = preg_replace( '/\(|\)|\-|\//', '', $mp_fx_val );
$mp_fx_val = preg_replace( '/^\+49/', '0', $mp_fx_val );
//print_r( $mp_fx_val . PHP_EOL );
$bd_fax = $mp_fx_val;
}
$mp_wk_el_el = $mdb_cntc_xp->query(
'.//a[contains( @class, "linkEmail" )]',
$mdb_cntc_el
);
foreach ( $mp_wk_el_el as $mp_el ) {
//print_r($mp_el);
$mp_el_val = $mp_el->nodeValue;
//print_r( $mp_el_val . PHP_EOL );
$bd_mail = $mp_el_val;
}
$mdb_cntc_cur_buero[ 'Adresse' ][ 'frag' ] = array (
'Straße' => $bd_str,
'Hausnummer' => $bd_hnr,
'Postleitzahl' => $bd_plz,
'Ort' => $bd_ort,
);
$mdb_cntc_cur_buero[ 'Adresse' ][ 'full' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $bd_str
. ' '
. $bd_hnr
. PHP_EOL
. $bd_plz
. ' '
. $bd_ort;
$mdb_cntc_cur_buero[ 'Kontakt' ][ 'Mail' ] = $bd_mail;
$mdb_cntc_cur_buero[ 'Kontakt' ][ 'Telefon' ] = $bd_tel;
$mdb_cntc_cur_buero[ 'Kontakt' ][ 'Fax' ] = $bd_fax;
$mdb_cntc_adr_arr[] = $mdb_cntc_cur_buero;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ] = $bd_str;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ] = $bd_hnr;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ] = $bd_plz;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ] = $bd_ort;
$kontakt_arr[ 'adresse' ][ 'komplett' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ]
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ];
$kontakt_arr[ 'eda' ][ 'mail' ] = $bd_mail;
$kontakt_arr[ 'eda' ][ 'telefon' ] = $bd_tel;
$kontakt_arr[ 'eda' ][ 'fax' ] = $bd_fax;
$mdb_data_arr[ 'kontakte' ][] = $kontakt_arr;
}
$mdb_data_arr[ 'url' ] = $mdb_pl_url;
$mdb_db[] = $mdb_data_arr;
/***/
$mdb_db_tmp[] = array(
'Name' => $name_arr,
'Kontaktdaten' => $mdb_cntc_adr_arr,
// 'Fraktion' => $ffkt_fl_arr,
// 'Wahl' => $wahl_arr,
'Links' => $linkliste_arr,
'Bio' => $biogra_arr,
'PURL' => $mdb_pl_url,
);
/***/
}
// print_r($mdb_db);die;
// print_r($mdb_db_tmp);die;
file_put_contents( $db_file_arr, var_export( $mdb_db, true ) );
file_put_contents( $db_file_arr.'.json', json_encode( $mdb_db, true ) );
parser-spd.php
<?php
// $profil_url_daten_obj = new ProfilURLDaten();
// //print_r( $profil_url_daten_obj );
// $profil_url_daten_obj->pud_dump();
// die;
$mdb_db = array();
$mdb_db_tmp = array();
$db_file_arr = 'tmp/spd_mdb_db';
$baseurl = $data[ 'current' ][ 'baseurl' ];
$mdb_lst_pg_dd = $data[ 'current' ][ 'domdoc' ];
$mdb_lst_dn = $mdb_lst_pg_dd->getElementById( 'member_overview_list' );
$mdb_lst_dd = new DomDocument;
$mdb_lst_dd->appendChild(
$mdb_lst_dd->importNode(
$mdb_lst_dn,
true
)
);
$mdb_lst_xp = new DOMXpath( $mdb_lst_dd );
$mdb_lst_lis = $mdb_lst_xp->query( './li' );
foreach ( $mdb_lst_lis as $mdb_lst_li ) {
// print_r($value);
$profil_url_daten_obj = new ProfilURLDaten();
$mdb_data_arr = $profil_url_daten_obj->profil_url_daten;
$name_from_list = $mdb_lst_xp->query(
'.//div[contains(@class, "info_wrapper")]/h3/a',
$mdb_lst_li
);
// print_r($name_from_list);
foreach ( $name_from_list as $nfl ) {
// print_r( $nfl->getAttribute( 'href' ) );
$name_arr = array();
$nfl_val = $nfl->nodeValue;
$mdb_pl_href = $nfl->getAttribute( 'href' );
// Clean Up Whitespaces
$nfl_str = preg_replace( '/^\s+|\n|\r|\s+$/m', '', $nfl_val );
// Separate Title from Name
$nfl_ti_fn_nn_arr = explode( '.', $nfl_str );
$nfl_fn_nn_str = trim( array_pop( $nfl_ti_fn_nn_arr ) );
// Create Array with NN FN
$nfl_fn_nn_arr = explode( ' ', $nfl_fn_nn_str );
if ( count( $nfl_fn_nn_arr ) != 2 ) {
// print_r( count( $nfl_fn_nn_arr ) );
// print_r( $nfl_fn_nn_arr );
if ( $nfl_fn_nn_arr[ 2 ] = 'Ridder' ) {
$mdb_nn = trim( $nfl_fn_nn_arr[ 1 ] )
. ' '
. trim( $nfl_fn_nn_arr[ 2 ] );
$mdb_vn = trim( $nfl_fn_nn_arr[0] );
} elseif ( $nfl_fn_nn_arr[ 2 ] = 'Rossmann' ) {
$mdb_nn = trim( $nfl_fn_nn_arr[ 2 ] );
$mdb_vn = trim( $nfl_fn_nn_arr[ 0 ] )
. ' '
. trim( $nfl_fn_nn_arr[ 1 ] );
}
} else {
$mdb_nn = trim( $nfl_fn_nn_arr[ 1 ] );
$mdb_vn = trim( $nfl_fn_nn_arr[ 0 ] );
}
$nfl_ti_arr = $nfl_ti_fn_nn_arr;
$mdb_ti = '';
if ( ! empty( $nfl_ti_arr ) ) {
foreach ( $nfl_ti_arr as $title_ele ) {
$mdb_ti .= $title_ele . '.';
}
}
$name_arr = array(
'Vorname' => $mdb_vn,
'Nachname' => $mdb_nn,
'Titel' => $mdb_ti,
);
$mdb_data_arr[ 'name' ][ 'nachname' ] = $mdb_nn;
$mdb_data_arr[ 'name' ][ 'vorname' ] = $mdb_vn;
$mdb_data_arr[ 'name' ][ 'adelszusatz' ] = '';
$mdb_data_arr[ 'name' ][ 'titel' ] = $mdb_ti;
}
$msll_mi_ul = $mdb_lst_xp->query(
'.//ul[contains(@class, "member_infos")]',
$mdb_lst_li
);
foreach ( $msll_mi_ul as $mmu ) {
//print_r($mu);
$wahl_arr = array();
$mmu_lis = $mdb_lst_xp->query( './li', $mmu );
foreach ( $mmu_lis as $muli ) {
//print_r($mul);
$wk_nr_arr = array();
$mi_li_val = trim( $muli->nodeValue );
//print_r( $mi_li_val . PHP_EOL );
$mi_li_first = strtok( $mi_li_val, ' ' );
//print_r( $mi_li_first . PHP_EOL );
if ( $mi_li_first == 'Direktmandat' ) {
//print_r( $wk_nr );
$wa_wi_typ = $mi_li_first;
$wahl_arr[ 'Typ' ] = $wa_wi_typ;
preg_match( '/(?<=\[).*?(?=\])/', $mi_li_val, $wk_nr_arr );
//print_r( $wk_nr_arr );
$wa_wi_data = $wk_nr_arr;
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'typ' ] = $wa_wi_typ;
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'wahlkreisnummer' ] = $wk_nr_arr[0];
} elseif ( $mi_li_first == 'Landesliste' ) {
//print_r( $wk_nr );
$wa_wi_typ = $mi_li_first;
$wahl_arr[ 'Typ' ] = $wa_wi_typ;
preg_match( '/(?<=\[).*?(?=\])/', $mi_li_val, $wk_nr_arr );
//print_r( $wk_nr_arr );
$wa_wi_data = $wk_nr_arr;
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'typ' ] = $wa_wi_typ;
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'wahlkreisnummer' ] = $wk_nr_arr[0];
} elseif ( $mi_li_first == 'Betreuter' ) {
//print_r( $mi_li_val );
$wa_wi_typ = 'Betreut';
preg_match( '/(?<=\[).*?(?=\])/', $mi_li_val, $wk_nr_arr );
//print_r( $wk_nr_arr );
$wa_wi_data = $wk_nr_arr;
$mdb_data_arr[ 'politik' ][ 'fraktion' ][ 'betreutewahlkreise' ] = $wk_nr_arr;
} elseif ( $mi_li_first == 'Betreute' ) {
//print_r( $mi_li_val );
$wa_wi_typ = 'Betreut';
preg_match_all( '/(?<=\[).*?(?=\])/', $mi_li_val, $wk_nr_arr );
//print_r( $wk_nr_arr );
$wa_wi_data = $wk_nr_arr;
$mdb_data_arr[ 'politik' ][ 'fraktion' ][ 'betreutewahlkreise' ] = $wk_nr_arr;
} elseif ( $mi_li_first == 'Ab' || $mi_li_first == 'Bis' ) {
//print_r( $mi_li_val . PHP_EOL );
$wa_wi_typ = 'Sonderfall';
$vb_arr = explode( ' ', $mi_li_val );
//print_r( $vb_arr );
$wa_sf_typ = $vb_arr[ 0 ];
$wa_sf_datum = $vb_arr[ 1 ];
$wa_sf_grund = '';
if ( count( $vb_arr ) == 3 ) {
$clean_third_ele = substr( $vb_arr[ 2 ], 1, -1 );
$vb_arr[ 2 ] = $clean_third_ele;
$wa_sf_grund = $vb_arr[ 2 ];
}
$wa_wi_data = array(
'Typ' => $wa_sf_typ,
'Datum' => $wa_sf_datum,
'Grund' => $wa_sf_grund,
);
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'sonderfall' ][ 'typ' ] = $wa_sf_typ;
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'sonderfall' ][ 'datum' ] = $wa_sf_datum;
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'sonderfall' ][ 'grund' ] = $wa_sf_grund;
} else {
die( 'nicht berücksichtigt' );
}
$wahl_arr[ $wa_wi_typ ] = $wa_wi_data;
}
}
$mdb_pl_url = $baseurl . $mdb_pl_href;
$src_file_name = basename( $mdb_pl_url );
$src_path = 'tmp/spd/src/abg/';
$src_file = $src_path . $src_file_name;
if ( ! file_exists( $src_file ) ) {
$src_url = file_get_contents( $mdb_pl_url );
file_put_contents(
$src_file,
$src_url
);
}
$mdb_pl_src = file_get_contents( $src_file );
//$mdb_pl_src = file_get_contents( $mdb_pl_url );
$mdb_pl_dd = initialize_domdoc( $mdb_pl_src );
$mdb_pl_xp = new DOMXpath( $mdb_pl_dd );
$mdb_pl_cb = $mdb_pl_xp->query( '//*[contains( @class, "map_box_content" )]' );
/***/
foreach ( $mdb_pl_cb as $mp_cb ) {
//print_r($mpcb);
$mp_cb_lis = $mp_cb->getElementsByTagName( 'li' );
//$cb_header = $mpcb->getElementsByTagName( 'h3' );
//print_r($cb_header->item(0)->nodeValue);
$mp_cb_adr_arr = array();
foreach ( $mp_cb_lis as $mp_cb_li ) {
$mp_cb_li_adr_arr = array();
$mp_cb_li_adr_eff = array();
$mp_cb_li_kontakt = array();
$kontakt_arr = $profil_url_daten_obj->kontakt;
$buero_name = trim( get_element_by_tag_name_unique( $mp_cb_li, 'h3' ) );
$buero_mail = mailto_remover(
get_element_by_tag_name_unique(
$mp_cb_li,
'a',
'href'
)
);
$mp_cb_li_kontakt[ 'Mail' ] = $buero_mail;
$buero_strort = get_element_by_tag_name_item_nr( $mp_cb_li, 'span', 0 );
$buero_telfax = get_element_by_tag_name_item_nr( $mp_cb_li, 'span', 1 );
$buero_strort_arr = explode( '|', $buero_strort );
$buero_telfax_arr = explode( '|', $buero_telfax );
array_trim( $buero_strort_arr );
array_trim( $buero_telfax_arr );
$buero_tel = $buero_telfax_arr[ 0 ];
$buero_fax = $buero_telfax_arr[ 1 ];
$cbuero_tel = trim(
preg_replace(
'/\s+/u',
' ',
preg_replace(
'/[a-zA-Z\,\.\:\/\-\Ü]/',//[^0-9,.]
'',
$buero_tel
)
)
);
$mp_cb_li_kontakt[ 'Telefon' ] = $cbuero_tel;
$cbuero_fax = trim(
preg_replace(
'/\s+/u',
' ',
preg_replace(
'/[a-zA-Z\,\.\:\/\-\Ü]/',//[^0-9,.]
'',
$buero_fax
)
)
);
$mp_cb_li_kontakt[ 'Fax' ] = $cbuero_fax;
$buero_strnr = $buero_strort_arr[ 0 ];
$buero_plzort = $buero_strort_arr[ 1 ];
//$strnr_arr = explode( ' ', $strnr );
$buero_strnr_arr = preg_split( '/(?=\d)/', $buero_strnr, 2 );
array_trim( $buero_strnr_arr );
//print_r( $strnr_arr );
if ( ! isset( $buero_strnr_arr[ 1 ] ) && $mdb_nn == 'Kiziltepe' ) {
//print_r( $vorname . ' ' . $nachname . PHP_EOL );
$buero_strnr_arr[ 1 ] = '4';
}
$buero_str = $buero_strnr_arr[ 0 ];
$buero_hnr = $buero_strnr_arr[ 1 ];
//$plzort_arr = explode( ' ', $plzort );
$buero_plzort_arr = preg_split( '/(?!\d)/', $buero_plzort, 2 );
array_trim( $buero_plzort_arr );
$buero_plz = $buero_plzort_arr[ 0 ];
$buero_ort = $buero_plzort_arr[ 1 ];
$mp_cb_li_adr_eff[ 'full' ] = $mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
//. $buero_name
//. PHP_EOL
. $buero_strnr
. PHP_EOL
. $buero_plzort;
$mp_cb_li_adr_eff[ 'frag' ] = array(
'Straße' => $buero_str,
'Hausnummer' => $buero_hnr,
'Postleitzahl' => $buero_plz,
'Ort' => $buero_ort,
);
$mp_cb_li_adr_arr = array(
'Bezeichnung' => $buero_name,
'Adresse' => $mp_cb_li_adr_eff,
'Kontakt' => $mp_cb_li_kontakt,
);
array_push( $mp_cb_adr_arr, $mp_cb_li_adr_arr );
$kontakt_arr[ 'bezeichnung' ] = $buero_name;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ] = $buero_str;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ] = $buero_hnr;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ] = $buero_plz;
$kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ] = $buero_ort;
$kontakt_arr[ 'adresse' ][ 'komplett' ] =
$mdb_vn
. ' '
. $mdb_nn
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'strasse' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'hausnummer' ]
. PHP_EOL
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'postleitzahl' ]
. ' '
. $kontakt_arr[ 'adresse' ][ 'fragmente' ][ 'ort' ];
$kontakt_arr[ 'eda' ][ 'mail' ] = $buero_mail;
$kontakt_arr[ 'eda' ][ 'telefon' ] = $cbuero_tel;
$kontakt_arr[ 'eda' ][ 'fax' ] = $cbuero_fax;
$mdb_data_arr[ 'kontakte' ][] = $kontakt_arr;
}
}
/***/
$mdb_pl_ad_dn = $mdb_pl_dd->getElementById( 'article_detail' );
$mdb_pl_ad_dd = new DomDocument;
$mdb_pl_ad_dd->appendChild(
$mdb_pl_ad_dd->importNode(
$mdb_pl_ad_dn,
true
)
);
//print_r( $mdb_pl_ad_dd );
$mdb_pl_ad_xp = new DOMXpath( $mdb_pl_ad_dd );
//print_r( $mdb_pl_ad_xp );
/***/
$mdb_pl_ad_xp_ll = $mdb_pl_ad_xp->query( '//ul[contains( @class, "linklist" )]' );
$linkliste_arr = array();
foreach ( $mdb_pl_ad_xp_ll as $mpax_ll ) {
//print_r( $mpax_ll );
$mpax_ll_lis = $mdb_pl_ad_xp->query(
'./li',
$mpax_ll
);
//print_r( $mpax_ll_lis );
foreach ( $mpax_ll_lis as $mpax_ll_li) {
//print_r( $mpax_ll_li );
$mpax_ll_li_val = $mpax_ll_li->nodeValue;
if ( empty( $mpax_ll_li_val ) ) {
continue;
}
//print_r( $mpax_ll_li->nodeValue . PHP_EOL );
$mpax_ll_li_a = $mdb_pl_ad_xp->query(
'./a',
$mpax_ll_li
);
foreach ( $mpax_ll_li_a as $mlla ) {
$mpax_ll_li_a_href = $mlla->getAttribute( 'href' );
}
if ( empty( $mpax_ll_li_a_href ) ) {
continue;
}
//print_r( $mlla->getAttribute( 'href' ) . PHP_EOL );
$link_arr = array(
'txt_val' => $mpax_ll_li_val,
'href' => $mpax_ll_li_a_href,
);
if ( preg_match( '/^homepage|website/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'Homepage' ] = $link_arr;
} elseif ( preg_match( '/ Blog$/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'Homepage' ] = $link_arr;
} elseif ( preg_match( '/^youtube/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Youtube' ] = $link_arr;
} elseif ( preg_match( '/^facebook/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Facebook' ] = $link_arr;
} elseif ( preg_match( '/^twitter/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Twitter' ] = $link_arr;
} elseif ( preg_match( '/^google+/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'Google+' ] = $link_arr;
} elseif ( preg_match( '/^flickr/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'flickr' ] = $link_arr;
} elseif ( preg_match( '/^xing/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'xing' ] = $link_arr;
} elseif ( preg_match( '/^meinvz/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'meinvz' ] = $link_arr;
} elseif ( preg_match( '/^studivz/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'studivz' ] = $link_arr;
} elseif ( preg_match( '/^friendfeed/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'SozialeNetzwerke' ][ 'friendfeed' ] = $link_arr;
} elseif ( preg_match( '/^abgeordnetenwatch/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'spezifisch' ][ 'abgeordnetenwatch' ] = $link_arr;
} elseif ( preg_match( '/^Reden im Videoarchiv des Bundestags/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'Bundestag' ][ 'Reden' ] = $link_arr;
} elseif ( preg_match( '/^Porträt auf bundestag\.de/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'Bundestag' ][ 'Profil' ] = $link_arr;
} elseif ( preg_match( '/^Landesgruppe/iu', $mpax_ll_li_val ) ) {
$linkliste_arr[ 'Sonstige' ][ 'bereichsspezifisch' ][ 'Landesgruppe' ] = $link_arr;
} else {
$linkliste_arr[ 'Sonstige' ][ 'Unsortiert' ][ $mpax_ll_li_val ] = $link_arr;
}
$verw_arr = $profil_url_daten_obj->verweisbasis;
$verw_arr[ 'text' ] = $mpax_ll_li_val;
$verw_arr[ 'href' ] = $mpax_ll_li_a_href;
if ( preg_match( '/^homepage|website/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'persoenlicheseite' ] = $verw_arr;
} elseif ( preg_match( '/ Blog$/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'persoenlicheseite' ] = $verw_arr;
} elseif ( preg_match( '/^youtube/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Youtube' ] = $verw_arr;
} elseif ( preg_match( '/^facebook/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Facebook' ] = $verw_arr;
} elseif ( preg_match( '/^twitter/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Twitter' ] = $verw_arr;
} elseif ( preg_match( '/^google+/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'Google+' ] = $verw_arr;
} elseif ( preg_match( '/^flickr/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'flickr' ] = $verw_arr;
} elseif ( preg_match( '/^xing/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'xing' ] = $verw_arr;
} elseif ( preg_match( '/^meinvz/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'meinvz' ] = $verw_arr;
} elseif ( preg_match( '/^studivz/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'studivz' ] = $verw_arr;
} elseif ( preg_match( '/^friendfeed/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sozialenetzwerke' ][ 'friendfeed' ] = $verw_arr;
} elseif ( preg_match( '/^abgeordnetenwatch/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'spezifisch' ][ 'abgeordnetenwatch' ] = $verw_arr;
} elseif ( preg_match( '/^Reden im Videoarchiv des Bundestags/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'bundestag' ][ 'reden' ] = $verw_arr;
} elseif ( preg_match( '/^Porträt auf bundestag\.de/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'bundestag' ][ 'profil' ] = $verw_arr;
} elseif ( preg_match( '/^Landesgruppe/iu', $mpax_ll_li_val ) ) {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'bereich' ][ 'Landesgruppe' ] = $verw_arr;
} else {
$mdb_data_arr[ 'verweise' ][ 'sonstige' ][ 'unsortiert' ][ $mpax_ll_li_val ] = $verw_arr;
}
}
}
/***/
/***/
$mdb_pl_ad_xp_bg = $mdb_pl_ad_xp->query( '//dl[contains( @class, "block" )]' );
$biogra_arr = array();
foreach ( $mdb_pl_ad_xp_bg as $mpax_bg ) {
//print_r($mpax_bg);
$mpax_bg_dtdds = $mdb_pl_ad_xp->query(
'./dt',
$mpax_bg
);
//print_r( $mpax_bg_dtdds );
foreach ( $mpax_bg_dtdds as $mpax_bg_dddt) {
//print_r( $mpax_bg_dddt );
//print_r( $mpax_bg_dddt->nextSibling->nextSibling );
$mdb_bio_tb_key = substr( $mpax_bg_dddt->nodeValue, 0, -1 );
$mdb_bio_tb_val = $mpax_bg_dddt->nextSibling->nextSibling->nodeValue;
$biogra_arr[ $mdb_bio_tb_key ] = $mdb_bio_tb_val;
if ( $mdb_bio_tb_key == 'Geburtsdatum' ) {
preg_match( '/(\d{1,2}(.*?)(\d{4}))/', $mdb_bio_tb_val, $geb_dat_arr );
//print_r( $geb_dat_arr[ 0 ] . PHP_EOL );
$mdb_data_arr[ 'biografie' ][ 'geburtsdatum' ] = $geb_dat_arr[ 0 ];
if ( strpos( $mdb_bio_tb_val, 'in' ) !== false ) {
$geb_ort_arr = explode( 'in ', $mdb_bio_tb_val );
//print_r( $geb_ort_arr );
$mdb_data_arr[ 'biografie' ][ 'geburtsort' ] = $geb_ort_arr[1];
}
}
if ( $mdb_bio_tb_key == 'Beruf' ) {
$beruf_arr = explode( ',', $mdb_bio_tb_val );
array_trim( $beruf_arr );
$mdb_data_arr[ 'biografie' ][ 'beruf' ] = $beruf_arr;
}
if ( $mdb_bio_tb_key == 'Legislaturperioden' ) {
$legper_arr = explode( '|', $mdb_bio_tb_val );
array_trim( $legper_arr );
$mdb_data_arr[ 'politik' ][ 'bundestag' ][ 'legislaturperioden' ] = $legper_arr;
}
if ( $mdb_bio_tb_key == 'Landesliste' ) {
$mdb_data_arr[ 'politik' ][ 'wahl' ][ 'mandat' ][ 'bundesland' ] = $mdb_bio_tb_val;
}
}
//print_r( $biogra_arr );
}
/***/
$mdb_data_arr[ 'url' ] = $mdb_pl_url;
$mdb_db[] = $mdb_data_arr;
/***/
$mdb_db_tmp[] = array(
'Name' => $name_arr,
'Kontaktdaten' => $mp_cb_adr_arr,
//'Fraktion' => $ffkt_fl_arr,
'Wahl' => $wahl_arr,
'Links' => $linkliste_arr,
'Bio' => $biogra_arr,
'PURL' => $mdb_pl_url,
);
/***/
}
// print_r($mdb_db);die;
// print_r($mdb_db_tmp);die;
file_put_contents( $db_file_arr, var_export( $mdb_db, true ) );
file_put_contents( $db_file_arr.'.json', json_encode( $mdb_db, true ) );
variable-profil-url-daten.php
<?php
/**
* Profil URL Daten - Klasse und Struktur
* Definition der Standardstruktur der Daten der Fraktionsprofilseite der
* Mitglieder des Bundestags.
* Vermittels der Klasse ProfilURLDaten, die zugleich
* die Handhabung des Datencontainer-Arrays ermöglicht.
*
* @package mdb-parser
*/
/**
* Klasse ProfilURLDaten
*
* @author ng
*/
class ProfilURLDaten {
/**
* Profil URL Daten Container
*
* @var unknown
*/
public $profil_url_daten = array();
/**
* Namensbestandeteile
*
* @var array
*/
public $name = array();
/**
* Nachname
* > Namensbestandteile
*
* @var string
*/
public $nachname = '';
/**
* Vorname
* > Namensbestandteile
*
* @var string
*/
public $vorname = '';
/**
* Adelszusatz
* > Namensbestandteile
*
* @var string
*/
public $adelszusatz = '';
/**
* Titel
* > Namensbestandteile
*
* @var string
*/
public $titel = '';
/**
* Kontaktdaten
*
* @var array
*/
public $kontakte = array();
/**
* Kontakt
* > Kontaktdaten
*
* @var array
*/
public $kontakt = array();
/**
* Bezeichnung der Kontaktmöglichkeit
* > Kontaktdaten > Kontakt
*
* @var string
*/
public $kontakt_bezeichnung = '';
/**
* Adressdaten des Kontakts
* > Kontaktdaten > Kontakt
*
* @var array
*/
public $kontakt_adresse = array();
/**
* Adressfragmente
* > Kontaktdaten > Kontakt > Adresse
*
* @var array
*/
public $kontakt_adresse_fragmente = array();
/**
* Kontaktadressenbestandteil: Straße
* > Kontaktdaten > Kontakt > Adresse > Fragmente
*
* @var string
*/
public $kontakt_adresse_strasse = '';
/**
* Kontaktadressenbestandteil: Hausnummer
* > Kontaktdaten > Kontakt > Adresse > Fragmente
*
* @var string
*/
public $kontakt_adresse_hausnummer = '';
/**
* Kontaktadressenbestandteil: Postleitzahl
* > Kontaktdaten > Kontakt > Adresse > Fragmente
*
* @var string
*/
public $kontakt_adresse_postleitzahl = '';
/**
* Kontaktadressenbestandteil: Ort
* > Kontaktdaten > Kontakt > Adresse > Fragmente
*
* @var string
*/
public $kontakt_adresse_ort = '';
/**
* Komplette Adresse
* > Kontaktdaten > Kontakt > Adresse
*
* @var string
*/
public $kontakt_adresse_komplett = '';
/**
* Elektronische Datenaustausch (EDA) Informationen des Kontakts
* > Kontaktdaten > Kontakt
*
* @var array
*/
public $kontakt_eda = array();
/**
* Mailadresse des Kontakts
* > Kontaktdaten > Kontakt > EDA
*
* @var string
*/
public $eda_mail = '';
/**
* Telefonnummer des Kontakts
* > Kontaktdaten > Kontakt > EDA
*
* @var string
*/
public $eda_telefon = '';
/**
* Faxnummer des Kontakts
* > Kontaktdaten > Kontakt > EDA
*
* @var string
*/
public $eda_fax = '';
/**
* Politik
*
* @var array
*/
public $politik = array();
/**
* Wahl
* > Politik
* Elemente: Mandat, Sonderfall
*
* @var array
*/
public $wahl = array();
/**
* Mandat
* > Politik > Wahl
*
* @var array
*/
public $mandat = array();
/**
* Mandatstyp
* > Politik > Wahl > Mandat
*
* @var string
*/
public $mandat_typ = '';
/**
* Wahlkreisnummer
* > Politik > Wahl > Mandat
*
* @var string
*/
public $mandat_wahlkreisnummer = '';
/**
* Bundesland - Mandat
* > Politik > Wahl > Mandat
*
* @var string
*/
public $mandat_bundesland = '';
/**
* Sonderfall
* > Politik > Wahl
*
* @var array
*/
public $sonderfall = array();
/**
* Typ - Sonderfall
* > Politik > Wahl > Sonderfall
*
* @var string
*/
public $sonderfall_typ = '';
/**
* Datum - Sonderfall
* > Politik > Wahl > Sonderfall
*
* @var string
*/
public $sonderfall_datum = '';
/**
* Bedingung - Sonderfall
* > Politik > Wahl > Sonderfall
*
* @var string
*/
public $sonderfall_bedingung = '';
/**
* Fraktion
* > Politik
* Elemente: Funktionen, Arbeitskreise, betreute Wahlkreise
*
* @var array
*/
public $fraktion = array();
/**
* Funktionen in Fraktion
* > Politik > Fraktion
*
* @var array
*/
public $fraktion_funktionen = array();
/**
* Arbeitskreise in Fraktion
* > Politik > Fraktion
*
* @var array
*/
public $fraktion_arbeitskreise = array();
/**
* Betreute Wahlkreise
* > Politik > Fraktion
*
* @var array
*/
public $betreutewahlkreise = array();
/**
* Bundestag
* > Politik
*
* @var array
*/
public $bundestag = array();
/**
* Funktionen im Bundestag
* > Politik > Bundestag
*
* @var array
*/
public $bundestag_funktionen = array();
/**
* Legislaturperioden
* > Politik > Bundestag
*
* @var array
*/
public $legislaturperioden = array();
/**
* Verweise
* Elemente: persönliche Seite, Bundestag, soziale Netzwerke, Sonstige
*
* @var array
*/
public $verweise = array();
/**
* Verweisbasis
*
* @var array
*/
public $verweisbasis = array( 'text' => '', 'href' => '' );
/**
* Persoenliche Seite
* > Verweise
*
* @var array
*/
public $persoenlicheseite = array();
/**
* Verweise zu Bundestag
* > Verweise
*
* @var array
*/
public $verweise_bundestag = array();
/**
* Verweise zu Bundestagsprofil
* > Verweise > Bundestag
*
* @var array
*/
public $verweise_bundestag_profil = array();
/**
* Verweise zu Bundestagsreden
* > Verweise > Bundestag
*
* @var array
*/
public $verweise_bundestag_reden = array();
/**
* Verweise zu sozialen Netzwerken
* > Verweise
*
* @var array
*/
public $verweise_sozialenetzwerke = array();
/**
* Sonstige Verweise
* > Verweise
*
* @var array
*/
public $verweise_sonstige = array();
/**
* Spezielle sonstige Verweise
* > Verweise > Sonstige
*
* @var array
*/
public $verweise_sonstige_speziell = array();
/**
* Bereichsspezifische sonstige Verweise
* > Verweise > Sonstige
*
* @var array
*/
public $verweise_sonstige_bereich = array();
/**
* Unsortierte sonstige Verweise
* > Verweise > Sonstige
*
* @var array
*/
public $verweise_sonstige_unsortiert = array();
/**
* Biografie
* Elemente: Geburtsdatum, Geburtsort, Beruf(e)
*
* @var array
*/
public $biografie = array();
/**
* Geburtsdatum
* > Biografie
*
* @var string
*/
public $biografie_geburtsdatum = '';
/**
* Geburtsort
* > Biografie
*
* @var string
*/
public $biografie_geburtsort = '';
/**
* Beruf(e)
* > Biografie
*
* @var string
*/
public $biografie_beruf = array();
/**
* Profil URL
*
* @var string
*/
public $url = '';
/**
* Profilbild
*
* @var string
*/
public $profilbild = '';
function __construct() {
/**
* Setup
*/
$this->profil_url_daten['name'] = $this->name;
$this->profil_url_daten['kontakte'] = $this->kontakte;
$this->profil_url_daten['politik'] = $this->politik;
$this->profil_url_daten['verweise'] = $this->verweise;
$this->profil_url_daten['biografie'] = $this->biografie;
$this->profil_url_daten['url'] = $this->url;
$this->profil_url_daten['profilbild'] = $this->profilbild;
/**
* Setup > Name
*/
$this->profil_url_daten['name']['nachname'] = $this->nachname;
$this->profil_url_daten['name']['vorname'] = $this->vorname;
$this->profil_url_daten['name']['adelszusatz'] = $this->adelszusatz;
$this->profil_url_daten['name']['titel'] = $this->titel;
/**
* Setup > Politik
*/
$this->profil_url_daten['politik']['wahl'] = $this->wahl;
$this->profil_url_daten['politik']['fraktion'] = $this->fraktion;
$this->profil_url_daten['politik']['bundestag'] = $this->bundestag;
/**
* Setup > Politik > Wahl
*/
$this->profil_url_daten['politik']['wahl']['mandat'] = $this->mandat;
$this->profil_url_daten['politik']['wahl']['sonderfall'] = $this->sonderfall;
/**
* Setup > Politik > Wahl > Mandat
*/
$this->profil_url_daten['politik']['wahl']['mandat']['typ'] = $this->mandat_typ;
$this->profil_url_daten['politik']['wahl']['mandat']['wahlkreisnummer'] = $this->mandat_wahlkreisnummer;
$this->profil_url_daten['politik']['wahl']['mandat']['bundesland'] = $this->mandat_bundesland;
/**
* Setup > Politik > Wahl > Sonderfall
*/
$this->profil_url_daten['politik']['wahl']['sonderfall']['typ'] = $this->sonderfall_typ;
$this->profil_url_daten['politik']['wahl']['sonderfall']['datum'] = $this->sonderfall_datum;
$this->profil_url_daten['politik']['wahl']['sonderfall']['bedingung'] = $this->sonderfall_bedingung;
/**
* Setup > Politik > Fraktion
*/
$this->profil_url_daten['politik']['fraktion']['funktionen'] = $this->fraktion_funktionen;
$this->profil_url_daten['politik']['fraktion']['arbeitskreise'] = $this->fraktion_arbeitskreise;
$this->profil_url_daten['politik']['fraktion']['betreutewahlkreise'] = $this->betreutewahlkreise;
/**
* Setup > Politik > Bundestag
*/
$this->profil_url_daten['politik']['bundestag']['funktionen'] = $this->bundestag_funktionen;
$this->profil_url_daten['politik']['bundestag']['legislaturperioden'] = $this->legislaturperioden;
/**
* Setup > Verweise
*/
$this->profil_url_daten['verweise']['persoenlicheseite'] = $this->persoenlicheseite;
$this->profil_url_daten['verweise']['bundestag'] = $this->verweise_bundestag;
$this->profil_url_daten['verweise']['sozialenetzwerke'] = $this->verweise_sozialenetzwerke;
$this->profil_url_daten['verweise']['sonstige'] = $this->verweise_sonstige;
/**
* Setup > Verweise > Bundestag
*/
$this->profil_url_daten['verweise']['bundestag']['profil'] = $this->verweise_bundestag_profil;
$this->profil_url_daten['verweise']['bundestag']['reden'] = $this->verweise_bundestag_reden;
/**
* Setup > Verweise > Sonstige
*/
$this->profil_url_daten['verweise']['sonstige']['speziell'] = $this->verweise_sonstige_speziell;
$this->profil_url_daten['verweise']['sonstige']['bereich'] = $this->verweise_sonstige_bereich;
$this->profil_url_daten['verweise']['sonstige']['unsortiert'] = $this->verweise_sonstige_unsortiert;
/**
* Setup > Biografie
*/
$this->profil_url_daten['biografie']['geburtsdatum'] = $this->biografie_geburtsdatum;
$this->profil_url_daten['biografie']['geburtsort'] = $this->biografie_geburtsort;
$this->profil_url_daten['biografie']['beruf'] = $this->biografie_beruf;
/**
* Setup > Kontaktdaten > Kontakt
*/
$this->kontakt['bezeichnung'] = $this->kontakt_bezeichnung;
$this->kontakt['adresse'] = $this->kontakt_adresse;
$this->kontakt['eda'] = $this->kontakt_eda;
/**
* Setup > Kontaktdaten > Kontakt > Adresse
*/
$this->kontakt['adresse']['fragmente'] = $this->kontakt_adresse_fragmente;
$this->kontakt['adresse']['komplett'] = $this->kontakt_adresse_komplett;
/**
* Setup > Kontaktdaten > Kontakt > Adresse > Fragmente
*/
$this->kontakt['adresse']['fragmente']['strasse'] = $this->kontakt_adresse_strasse;
$this->kontakt['adresse']['fragmente']['hausnummer'] = $this->kontakt_adresse_hausnummer;
$this->kontakt['adresse']['fragmente']['postleitzahl'] = $this->kontakt_adresse_postleitzahl;
$this->kontakt['adresse']['fragmente']['ort'] = $this->kontakt_adresse_ort;
/**
* Setup > Kontaktdaten > Kontakt > EDA
*/
$this->kontakt['eda']['mail'] = $this->eda_mail;
$this->kontakt['eda']['telefon'] = $this->eda_telefon;
$this->kontakt['eda']['fax'] = $this->eda_fax;
//$this->profil_url_daten['kontakte'][] = $this->kontakt;
}
public function pud_dump( $pre = FALSE ) {
if ( $pre ) {
echo '<pre>';
}
print_r( $this->profil_url_daten );
if ( $pre ) {
echo '</pre>';
}
}
/**
*
* @return the unknown
*/
public function get_profil_url_daten() {
return $this->profil_url_daten;
}
/**
*
* @param array $profil_url_daten
*/
public function set_profil_url_daten( array $profil_url_daten ) {
$this->profil_url_daten = $profil_url_daten;
return $this;
}
/**
*
* @return array
*/
public function get_name() {
return $this->name;
}
/**
*
* @param array $name
*/
public function set_name( array $name ) {
$this->name = $name;
return $this;
}
/**
*
* @return string
*/
public function get_nachname() {
return $this->nachname;
}
/**
*
* @param string $nachname
*/
public function set_nachname( $nachname ) {
$this->nachname = $nachname;
return $this;
}
/**
*
* @return string
*/
public function get_vorname() {
return $this->vorname;
}
/**
*
* @param
* $vorname
*/
public function set_vorname( $vorname ) {
$this->vorname = $vorname;
return $this;
}
/**
*
* @return string
*/
public function get_adelszusatz() {
return $this->adelszusatz;
}
/**
*
* @param
* $adelszusatz
*/
public function set_adelszusatz( $adelszusatz ) {
$this->adelszusatz = $adelszusatz;
return $this;
}
/**
*
* @return string
*/
public function get_titel() {
return $this->titel;
}
/**
*
* @param
* $titel
*/
public function set_titel( $titel ) {
$this->titel = $titel;
return $this;
}
/**
*
* @return array
*/
public function get_kontakte() {
return $this->kontakte;
}
/**
*
* @param array $kontakte
*/
public function set_kontakte( array $kontakte ) {
$this->kontakte = $kontakte;
return $this;
}
/**
*
* @return array
*/
public function get_kontakt() {
return $this->kontakt;
}
/**
*
* @param array $kontakt
*/
public function set_kontakt( array $kontakt ) {
$this->kontakt = $kontakt;
return $this;
}
/**
*
* @return string
*/
public function get_kontakt_bezeichnung() {
return $this->kontakt_bezeichnung;
}
/**
*
* @param
* $kontakt_bezeichnung
*/
public function set_kontakt_bezeichnung( $kontakt_bezeichnung ) {
$this->kontakt_bezeichnung = $kontakt_bezeichnung;
return $this;
}
/**
*
* @return array
*/
public function get_kontakt_adresse() {
return $this->kontakt_adresse;
}
/**
*
* @param array $kontakt_adresse
*/
public function set_kontakt_adresse( array $kontakt_adresse ) {
$this->kontakt_adresse = $kontakt_adresse;
return $this;
}
/**
*
* @return array
*/
public function get_kontakt_adresse_fragmente() {
return $this->kontakt_adresse_fragmente;
}
/**
*
* @param array $kontakt_adresse_fragmente
*/
public function set_kontakt_adresse_fragmente(
array $kontakt_adresse_fragmente ) {
$this->kontakt_adresse_fragmente = $kontakt_adresse_fragmente;
return $this;
}
/**
*
* @return string
*/
public function get_kontakt_adresse_strasse() {
return $this->kontakt_adresse_strasse;
}
/**
*
* @param
* $kontakt_adresse_strasse
*/
public function set_kontakt_adresse_strasse( $kontakt_adresse_strasse ) {
$this->kontakt_adresse_strasse = $kontakt_adresse_strasse;
return $this;
}
/**
*
* @return string
*/
public function get_kontakt_adresse_hausnummer() {
return $this->kontakt_adresse_hausnummer;
}
/**
*
* @param
* $kontakt_adresse_hausnummer
*/
public function set_kontakt_adresse_hausnummer( $kontakt_adresse_hausnummer ) {
$this->kontakt_adresse_hausnummer = $kontakt_adresse_hausnummer;
return $this;
}
/**
*
* @return string
*/
public function get_kontakt_adresse_postleitzahl() {
return $this->kontakt_adresse_postleitzahl;
}
/**
*
* @param
* $kontakt_adresse_postleitzahl
*/
public function set_kontakt_adresse_postleitzahl(
$kontakt_adresse_postleitzahl ) {
$this->kontakt_adresse_postleitzahl = $kontakt_adresse_postleitzahl;
return $this;
}
/**
*
* @return string
*/
public function get_kontakt_adresse_ort() {
return $this->kontakt_adresse_ort;
}
/**
*
* @param
* $kontakt_adresse_ort
*/
public function set_kontakt_adresse_ort( $kontakt_adresse_ort ) {
$this->kontakt_adresse_ort = $kontakt_adresse_ort;
return $this;
}
/**
*
* @return string
*/
public function get_kontakt_adresse_komplett() {
return $this->kontakt_adresse_komplett;
}
/**
*
* @param
* $kontakt_adresse_komplett
*/
public function set_kontakt_adresse_komplett( $kontakt_adresse_komplett ) {
$this->kontakt_adresse_komplett = $kontakt_adresse_komplett;
return $this;
}
/**
*
* @return array
*/
public function get_kontakt_eda() {
return $this->kontakt_eda;
}
/**
*
* @param array $kontakt_eda
*/
public function set_kontakt_eda( array $kontakt_eda ) {
$this->kontakt_eda = $kontakt_eda;
return $this;
}
/**
*
* @return string
*/
public function get_eda_mail() {
return $this->eda_mail;
}
/**
*
* @param
* $eda_mail
*/
public function set_eda_mail( $eda_mail ) {
$this->eda_mail = $eda_mail;
return $this;
}
/**
*
* @return string
*/
public function get_eda_telefon() {
return $this->eda_telefon;
}
/**
*
* @param
* $eda_telefon
*/
public function set_eda_telefon( $eda_telefon ) {
$this->eda_telefon = $eda_telefon;
return $this;
}
/**
*
* @return string
*/
public function get_eda_fax() {
return $this->eda_fax;
}
/**
*
* @param
* $eda_fax
*/
public function set_eda_fax( $eda_fax ) {
$this->eda_fax = $eda_fax;
return $this;
}
/**
*
* @return array
*/
public function get_politik() {
return $this->politik;
}
/**
*
* @param array $politik
*/
public function set_politik( array $politik ) {
$this->politik = $politik;
return $this;
}
/**
*
* @return array
*/
public function get_wahl() {
return $this->wahl;
}
/**
*
* @param array $wahl
*/
public function set_wahl( array $wahl ) {
$this->wahl = $wahl;
return $this;
}
/**
*
* @return array
*/
public function get_mandat() {
return $this->mandat;
}
/**
*
* @param array $mandat
*/
public function set_mandat( array $mandat ) {
$this->mandat = $mandat;
return $this;
}
/**
*
* @return string
*/
public function get_mandat_typ() {
return $this->mandat_typ;
}
/**
*
* @param
* $mandat_typ
*/
public function set_mandat_typ( $mandat_typ ) {
$this->mandat_typ = $mandat_typ;
return $this;
}
/**
*
* @return string
*/
public function get_mandat_wahlkreisnummer() {
return $this->mandat_wahlkreisnummer;
}
/**
*
* @param
* $mandat_wahlkreisnummer
*/
public function set_mandat_wahlkreisnummer( $mandat_wahlkreisnummer ) {
$this->mandat_wahlkreisnummer = $mandat_wahlkreisnummer;
return $this;
}
/**
*
* @return string
*/
public function get_mandat_bundesland() {
return $this->mandat_bundesland;
}
/**
*
* @param
* $mandat_bundesland
*/
public function set_mandat_bundesland( $mandat_bundesland ) {
$this->mandat_bundesland = $mandat_bundesland;
return $this;
}
/**
*
* @return array
*/
public function get_sonderfall() {
return $this->sonderfall;
}
/**
*
* @param array $sonderfall
*/
public function set_sonderfall( array $sonderfall ) {
$this->sonderfall = $sonderfall;
return $this;
}
/**
*
* @return string
*/
public function get_sonderfall_typ() {
return $this->sonderfall_typ;
}
/**
*
* @param
* $sonderfall_typ
*/
public function set_sonderfall_typ( $sonderfall_typ ) {
$this->sonderfall_typ = $sonderfall_typ;
return $this;
}
/**
*
* @return string
*/
public function get_sonderfall_datum() {
return $this->sonderfall_datum;
}
/**
*
* @param
* $sonderfall_datum
*/
public function set_sonderfall_datum( $sonderfall_datum ) {
$this->sonderfall_datum = $sonderfall_datum;
return $this;
}
/**
*
* @return string
*/
public function get_sonderfall_bedingung() {
return $this->sonderfall_bedingung;
}
/**
*
* @param
* $sonderfall_bedingung
*/
public function set_sonderfall_bedingung( $sonderfall_bedingung ) {
$this->sonderfall_bedingung = $sonderfall_bedingung;
return $this;
}
/**
*
* @return array
*/
public function get_fraktion() {
return $this->fraktion;
}
/**
*
* @param array $fraktion
*/
public function set_fraktion( array $fraktion ) {
$this->fraktion = $fraktion;
return $this;
}
/**
*
* @return array
*/
public function get_fraktion_funktionen() {
return $this->fraktion_funktionen;
}
/**
*
* @param array $fraktion_funktionen
*/
public function set_fraktion_funktionen( array $fraktion_funktionen ) {
$this->fraktion_funktionen = $fraktion_funktionen;
return $this;
}
/**
*
* @return array
*/
public function get_fraktion_arbeitskreise() {
return $this->fraktion_arbeitskreise;
}
/**
*
* @param array $fraktion_arbeitskreise
*/
public function set_fraktion_arbeitskreise( array $fraktion_arbeitskreise ) {
$this->fraktion_arbeitskreise = $fraktion_arbeitskreise;
return $this;
}
/**
*
* @return array
*/
public function get_betreutewahlkreise() {
return $this->betreutewahlkreise;
}
/**
*
* @param array $betreutewahlkreise
*/
public function set_betreutewahlkreise( array $betreutewahlkreise ) {
$this->betreutewahlkreise = $betreutewahlkreise;
return $this;
}
/**
*
* @return array
*/
public function get_bundestag() {
return $this->bundestag;
}
/**
*
* @param array $bundestag
*/
public function set_bundestag( array $bundestag ) {
$this->bundestag = $bundestag;
return $this;
}
/**
*
* @return array
*/
public function get_bundestag_funktionen() {
return $this->bundestag_funktionen;
}
/**
*
* @param array $bundestag_funktionen
*/
public function set_bundestag_funktionen( array $bundestag_funktionen ) {
$this->bundestag_funktionen = $bundestag_funktionen;
return $this;
}
/**
*
* @return array
*/
public function get_legislaturperioden() {
return $this->legislaturperioden;
}
/**
*
* @param array $legislaturperioden
*/
public function set_legislaturperioden( array $legislaturperioden ) {
$this->legislaturperioden = $legislaturperioden;
return $this;
}
/**
*
* @return array
*/
public function get_verweise() {
return $this->verweise;
}
/**
*
* @param array $verweise
*/
public function set_verweise( array $verweise ) {
$this->verweise = $verweise;
return $this;
}
/**
*
* @return array
*/
public function get_verweisbasis() {
return $this->verweisbasis;
}
/**
*
* @param array $verweisbasis
*/
public function set_verweisbasis( array $verweisbasis ) {
$this->verweisbasis = $verweisbasis;
return $this;
}
/**
*
* @return array
*/
public function get_persoenlicheseite() {
return $this->persoenlicheseite;
}
/**
*
* @param array $persoenlicheseite
*/
public function set_persoenlicheseite( array $persoenlicheseite ) {
$this->persoenlicheseite = $persoenlicheseite;
return $this;
}
/**
*
* @return array
*/
public function get_verweise_bundestag() {
return $this->verweise_bundestag;
}
/**
*
* @param array $verweise_bundestag
*/
public function set_verweise_bundestag( array $verweise_bundestag ) {
$this->verweise_bundestag = $verweise_bundestag;
return $this;
}
/**
*
* @return array
*/
public function get_verweise_bundestag_profil() {
return $this->verweise_bundestag_profil;
}
/**
*
* @param array $verweise_bundestag_profil
*/
public function set_verweise_bundestag_profil(
array $verweise_bundestag_profil ) {
$this->verweise_bundestag_profil = $verweise_bundestag_profil;
return $this;
}
/**
*
* @return array
*/
public function get_verweise_bundestag_reden() {
return $this->verweise_bundestag_reden;
}
/**
*
* @param array $verweise_bundestag_reden
*/
public function set_verweise_bundestag_reden(
array $verweise_bundestag_reden ) {
$this->verweise_bundestag_reden = $verweise_bundestag_reden;
return $this;
}
/**
*
* @return array
*/
public function get_verweise_sozialenetzwerke() {
return $this->verweise_sozialenetzwerke;
}
/**
*
* @param array $verweise_sozialenetzwerke
*/
public function set_verweise_sozialenetzwerke(
array $verweise_sozialenetzwerke ) {
$this->verweise_sozialenetzwerke = $verweise_sozialenetzwerke;
return $this;
}
/**
*
* @return array
*/
public function get_verweise_sonstige() {
return $this->verweise_sonstige;
}
/**
*
* @param array $verweise_sonstige
*/
public function set_verweise_sonstige( array $verweise_sonstige ) {
$this->verweise_sonstige = $verweise_sonstige;
return $this;
}
/**
*
* @return array
*/
public function get_verweise_sonstige_speziell() {
return $this->verweise_sonstige_speziell;
}
/**
*
* @param array $verweise_sonstige_speziell
*/
public function set_verweise_sonstige_speziell(
array $verweise_sonstige_speziell ) {
$this->verweise_sonstige_speziell = $verweise_sonstige_speziell;
return $this;
}
/**
*
* @return array
*/
public function get_verweise_sonstige_bereich() {
return $this->verweise_sonstige_bereich;
}
/**
*
* @param array $verweise_sonstige_bereich
*/
public function set_verweise_sonstige_bereich(
array $verweise_sonstige_bereich ) {
$this->verweise_sonstige_bereich = $verweise_sonstige_bereich;
return $this;
}
/**
*
* @return array
*/
public function get_verweise_sonstige_unsortiert() {
return $this->verweise_sonstige_unsortiert;
}
/**
*
* @param array $verweise_sonstige_unsortiert
*/
public function set_verweise_sonstige_unsortiert(
array $verweise_sonstige_unsortiert ) {
$this->verweise_sonstige_unsortiert = $verweise_sonstige_unsortiert;
return $this;
}
/**
*
* @return array
*/
public function get_biografie() {
return $this->biografie;
}
/**
*
* @param array $biografie
*/
public function set_biografie( array $biografie ) {
$this->biografie = $biografie;
return $this;
}
/**
*
* @return string
*/
public function get_biografie_geburtsdatum() {
return $this->biografie_geburtsdatum;
}
/**
*
* @param string $biografie_geburtsdatum
*/
public function set_biografie_geburtsdatum( $biografie_geburtsdatum ) {
$this->biografie_geburtsdatum = $biografie_geburtsdatum;
return $this;
}
/**
*
* @return string
*/
public function get_biografie_geburtsort() {
return $this->biografie_geburtsort;
}
/**
*
* @param string $biografie_geburtsort
*/
public function set_biografie_geburtsort( $biografie_geburtsort ) {
$this->biografie_geburtsort = $biografie_geburtsort;
return $this;
}
/**
*
* @return array
*/
public function get_biografie_beruf() {
return $this->biografie_beruf;
}
/**
*
* @param array $biografie_beruf
* Numerisches Array, dass den oder die Berufe enthält
*/
public function set_biografie_beruf( $biografie_beruf ) {
$this->biografie_beruf = $biografie_beruf;
return $this;
}
/**
* Get Variable $url
*
* @return string
*/
public function get_url() {
return $this->url;
}
/**
* Set Variable $url
*
* @param string $url
*/
public function set_url( $url ) {
$this->url = $url;
return $this;
}
/**
*
* @return string
*/
public function get_profilbild() {
return $this->profilbild;
}
/**
*
* @param string $profilbild
*/
public function set_profilbild( $profilbild ) {
$this->profilbild = $profilbild;
return $this;
}
}
Leave a Reply