MLEB/Translate/webservices/CaighdeanWebService.php


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93

<?php
/**
 * Contains a class for querying external translation service.
 *
 * @file
 * @author Niklas Laxström
 * @license GPL-2.0-or-later
 */

/**
 * Implements support Caighdean translator api.
 * @see https://github.com/kscanne/caighdean/blob/master/API.md
 * @ingroup TranslationWebService
 * @since 2017.04
 */
class CaighdeanWebService extends TranslationWebService {
	public function getType() {
		return 'mt';
	}

	public function mapCode( $code ) {
		return $code;
	}

	protected function doPairs() {
		$pairs = [
			'gd' => [ 'ga' => true ],
			'gv' => [ 'ga' => true ],
		];

		return $pairs;
	}

	protected function getQuery( $text, $from, $to ) {
		if ( !isset( $this->config['url'] ) ) {
			throw new TranslationWebServiceConfigurationException( '`url` not set in configuration' );
		}

		$text = trim( $text );
		if ( $text === '' ) {
			throw new TranslationWebServiceInvalidInputException( 'Input is empty' );
		}

		$data = wfArrayToCgi( [
			'foinse' => $from,
			'teacs' => $text,
		] );

		// Maximum payload is 16 KiB. Based ont testing 16000 bytes is safe by leaving 224
		// bytes for other things.
		if ( strlen( $data ) > 16000 ) {
			throw new TranslationWebServiceInvalidInputException( 'Input is over 16000 bytes long' );
		}

		return TranslationQuery::factory( $this->config['url'] )
			->timeout( $this->config['timeout'] )
			->postWithData( $data )
			->attachProcessingInstructions( $text );
	}

	protected function parseResponse( TranslationQueryResponse $reply ) {
		$body = $reply->getBody();
		$response = FormatJson::decode( $body );
		if ( !is_array( $response ) ) {
			throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) );
		}

		$text = '';
		$originalText = $reply->getQuery()->getProcessingInstructions();
		foreach ( $response as list( $sourceToken, $targetToken ) ) {
			$separator = ' ';
			$pos = strpos( $originalText, $sourceToken );
			// Try to keep the effects local. If we fail to match at token, we could accidentally
			// scan very far ahead in the text, find a false match and not find matches for all
			// of the tokens in the between.
			if ( $pos !== false && $pos < 50 ) {
				// Remove the portion of text we have processed. $pos should be zero, unless
				// we failed to match something earlier.
				$originalText = substr( $originalText, $pos + strlen( $sourceToken ) );
				if ( preg_match( '/^\s+/', $originalText, $match ) ) {
					$separator = $match[ 0 ];
					$originalText = substr( $originalText, strlen( $separator ) );
				} else {
					$separator = '';
				}
			}

			$text .= $targetToken . $separator;
		}

		return $text;
	}
}